##### R CODE TO REPLICATE THE STATISTICAL ANALYSES AND FIGURES OF
##### Adrian Lucardi, Juan Pablo Micozzi and Agustin Vallejo, "Does the Early Bird always Get the Worm? First Round Advantages and Second Round Victories in Latin America," Electoral Studies, 81, 2023
##### Last date: 2023-01-19


## emptying the workspace
rm (list=ls ())


## saving the output to a log file
sink (file="Log Analysis First Round LatAm.txt", append=FALSE, type="output", split=FALSE)


## loading & installing packages
if (!require ("pacman")) install.packages ("pacman")
pacman::p_load (
  dplyr
  , ggcorrplot
  , ggplot2
  , ggpmisc
  , ggthemes
  , haven
  , lfe
  , lpdensity
  , lubridate
  , maps
  , RcppRoll
  , rdd
  , rddensity
  , rdlocrand
  , rdpower
  , rdrobust
  , readxl
  , sf
  , tidyr
  , tidyverse
  , viridis
  , wesanderson
  , xtable
  )


## display options
options (digits=4
         , scipen=999 ## disable sci notation
         , show.signif.stars=FALSE
         , max.print=2000
         , tibble.width=Inf
         , tibble.print_max=Inf
         , tibble.print_min=1)


## setting the working directory --> replace this with path to your own working directory
home <- "~/Dropbox/Current projects/Second rounds paper/"
setwd (home)


## function for extracting values of interest from a RD model
extract_rd <- function (x) {
  out <- c (
    sprintf ("%.2f", x$coef[1])
    , str_c ("[", sprintf ("%.2f", x$ci[3,1]), ":", sprintf ("%.2f", x$ci[3,2]), "]")
    , sprintf ("%.2f", x$pv[3])
    , sprintf ("%.2f", x$bws[1,1])
    , str_c (sprintf ("%.0f", x$N_h)[1], sprintf ("%.0f", x$N_h)[2], sep="$|$")
  )
  return (out) }


## working max() and min() functions, not the b.s. introduced in R 3.2.2
max_na <- function (x) {
  if (all (is.na (x))==TRUE){ return (NA) 
  } else {return (max (x, na.rm=TRUE))} }
min_na <- function (x) {
  if (all (is.na (x))==TRUE){ return (NA) 
  } else {return (min (x, na.rm=TRUE))} } 


## setting the graphical parameters
digits <- 2 ## number of decimals to use
alpha_dots <- 0.4
col_cutoff <- "black"
pal_movie <- "Zissou1"
(col_movie <- wes_palette (pal_movie)) ## see https://github.com/karthik/wesanderson for a list of palettes
col_bin <- col_movie[1]
col_line <- col_movie[5]
col_viridis <- "rocket"
alpha_bin <- 1
place_text_x <- -28.5
place_text_x_placebo <- -29.0
place_text_y <- 97
place_text_y2 <- 24
place_text_y3 <- 68
place_text_y_placebo <- 1.1
place_text_y_placebo2 <- 0.65
size_bin <- 0.75
size_text <- 3.75
size_borders <- 0.015
size_legend <- 0.3
gray_lines <- "gray65"
reps_dlocrand <- 10000
theme_set (theme_bw (base_size=size_text*3))




#%%%%%%%%%%%%%%%%%%%%#
#### // 1 // DATA ####
#%%%%%%%%%%%%%%%%%%%%#

#### (1.1) Downloading ####

### (1.1.1) Main dataset ####

## (ALL candidates in ALL 2-round elections, even if there was no second round or none was needed) ####

bfull <- read_csv ("data/Second rounds - Full dataset 2022-10-12.csv"
                   , guess_max=9999) %>% 
  mutate (
    sample = factor (sample, levels=c (
      "World", "Brazil (governor)", "Brazil (mayor)", "Argentina", "Bolivia", "Chile", "Mexico"))
    , office = factor (office)
    , date_r1 = ymd (date_r1)
    , date_r2 = ymd (date_r1 + date_dist)
    
    ## incumbency and experience data
    , incParty = ifelse (is.na (incParty), 0, incParty)
    , incCandidate = ifelse (is.na (incCandidate), 0, incCandidate)
    , incCandidate_first = ifelse (rank_r1==1 & incCandidate==1, 1, 0)
    , incCandidate_second = ifelse (rank_r1==2 & incCandidate==1, 1, 0)
    , exp_bra_first = ifelse (rank_r1==1 & exp_bra==1, 1, 0)
    , exp_bra_second = ifelse (rank_r1==2 & exp_bra==1, 1, 0)
    , exp_bra2_first = ifelse (rank_r1==1 & exp_bra2==1, 1, 0)
    , exp_bra2_second = ifelse (rank_r1==2 & exp_bra2==1, 1, 0) ) %>% 
  group_by (sample, country, office, district, date_r1) %>% 
  mutate (
    incCandidate_first = max_na (incCandidate_first)
    , incCandidate_second = max_na (incCandidate_second)
    , incCandidate_none = ifelse (incCandidate_first + incCandidate_second == 0, 1, 0)
    , incCandidate_any = ifelse (incCandidate_first + incCandidate_second == 1, 1, 0)
    , incStatus = case_when (
      incCandidate_first == 0 & incCandidate_second == 0 ~ "none"
      , incCandidate_first == 1 ~ "first"
      , incCandidate_second == 1 ~ "second")
    , incStatus = factor (incStatus, levels=c (
      "first", "second", "none"))
    
    , exp_bra_first2 = ifelse (!sample %in% c ("Brazil (governor)", "Brazil (mayor)")
                               , NA, max_na (exp_bra_first))
    , exp_bra_second2 = ifelse (!sample %in% c ("Brazil (governor)", "Brazil (mayor)")
                                , NA, max_na (exp_bra_second))
    , exp_bra_none = ifelse (exp_bra_first2 == 0 & exp_bra_second2 == 0, 1, 0)
    , exp_bra_both = ifelse (exp_bra_first2 == 1 & exp_bra_second2 == 1, 1, 0)
    , exp_bra_first = ifelse (exp_bra_first2 == 1 & exp_bra_second2 == 0, 1, 0)
    , exp_bra_second = ifelse (exp_bra_first2 == 0 & exp_bra_second2 == 1, 1, 0)
    , expStatus = case_when (
      exp_bra_none == 1 ~ "none"
      , exp_bra_both == 1 ~ "both"
      , exp_bra_first == 1 ~ "first"
      , exp_bra_second == 1 ~ "second")
    , expStatus = factor (expStatus, levels=c (
      "both", "first", "second", "none"))
    , exp_bra_first2 = NULL
    , exp_bra_second2 = NULL
    
    , exp_bra2_first2 = ifelse (!sample %in% c ("Brazil (governor)", "Brazil (mayor)")
                                , NA, max_na (exp_bra2_first))
    , exp_bra2_second2 = ifelse (!sample %in% c ("Brazil (governor)", "Brazil (mayor)")
                                 , NA, max_na (exp_bra2_second))
    , exp_bra2_none = ifelse (exp_bra2_first2 == 0 & exp_bra2_second2 == 0, 1, 0)
    , exp_bra2_both = ifelse (exp_bra2_first2 == 1 & exp_bra2_second2 == 1, 1, 0)
    , exp_bra2_first = ifelse (exp_bra2_first2 == 1 & exp_bra2_second2 == 0, 1, 0)
    , exp_bra2_second = ifelse (exp_bra2_first2 == 0 & exp_bra2_second2 == 1, 1, 0)
    , expStatus2 = case_when (
      exp_bra2_none == 1 ~ "none"
      , exp_bra2_both == 1 ~ "both"
      , exp_bra2_first == 1 ~ "first"
      , exp_bra2_second == 1 ~ "second")
    , expStatus2 = factor (expStatus2
                           , levels=c ("both", "first", "second", "none"))
    , exp_bra2_first2 = NULL
    , exp_bra2_second2 = NULL)
summary (bfull); nrow (bfull)



### (1.1.2) NELDA v6.0 ####
excel_sheets (path="data/NELDA.xls")
nelda <- read_excel (path="data/NELDA.xls"
                     , sheet="Sheet1", na=c ("", "na", "NA")) %>% 
  mutate (
    
    ## correcting a couple of mistakes
    electionid = case_when (
      electionid == "0395-2004-624-P1" ~ "0395-2004-0626-P1"
      , electionid == "781-2018-9023-P1" ~ "781-2018-0923-P1"
      , electionid == "160-2019-0811-P1" ~ "160-2019-1027-P1" ## Argentina 2019; wrong date
      , electionid == "484-1992-0802-P1" ~ "484-1992-0808-P1" ## (Republic of the) Congo 1992; wrong date
      , TRUE ~ electionid)
    
    ## updating country names (for merging with NELDA)
    , country = str_replace_all (
      country, c (
        "Bosnia-Herzegovina" = "Bosnia and Herzegovina"
        , "Cote d'Ivoire" = "Ivory Coast"
        , "Congo" = "Republic of the Congo"
        , "Democratic Republic of Republic of the Congo" = "Democratic Republic of the Congo"
        , "East Timor" = "Timor-Leste"
        , "Gambia" = "The Gambia"
        , "Kyrgyz Republic" = "Kyrgyzstan"
        , "Macedonia \\(FYROM\\)" = "North Macedonia"
        , "Russia \\(Soviet Union\\)" = "Russia"
        , "Serbia \\(Yugoslavia\\)" = "Serbia"))
    
    ## getting the election date(s)
    , round = str_split_fixed (as.character (electionid), pattern="-", n=4)[,4]
    , date = str_split_fixed (as.character (electionid), pattern="-", n=4)[,3]
    , month1 = str_split_fixed (as.character (date), pattern="", n=4)[,1]
    , month2 = str_split_fixed (as.character (date), pattern="", n=4)[,2]
    , day1 = str_split_fixed (as.character (date), pattern="", n=4)[,3]
    , day2 = str_split_fixed (as.character (date), pattern="", n=4)[,4]
    , month = as.numeric (str_c (month1, month2, sep=""))
    , day = as.numeric (str_c (day1, day2, sep="")) ) %>% 
  filter (
    types == "Executive" & nelda3 == "yes" & nelda4 == "yes" & nelda5 == "yes" & round == "P1"
    & !(country %in% c ("Abkhazia", "Kiribati", "Palau", "South Ossetia"))) %>% ## not in V-Dem
  select (country, year, month, day) ## warnings reported. Don't worry


## two elections are inexplicably absent. We add them:
tmp_cro <- filter (nelda, country=="Croatia" & year==2019)
tmp_cro <- tmp_cro %>% mutate (
  year = 2014, month = 12, day = 28)
tmp_geo <- filter (nelda, country=="Georgia" & year==1995)
tmp_geo <- tmp_geo %>% mutate (
  year = 1991, month = 5, day = 26)
nelda <- bind_rows (nelda, tmp_cro, tmp_geo) %>% mutate (
    compet_nelda = 1
    , date = ymd (str_c (year, month, day, sep="-"))
    , cyear = str_c (country, year, sep="_")
    , cdate = str_c (country, date, sep="_") ) %>% 
  arrange (country, date)
summary (nelda); nrow (nelda) ## 825 presidential elections, 1945-2020



### (1.1.3) V-Dem v11 ####
vdem <- read_dta ("data/V-Dem-CY-Core-v11.1.dta", encoding="latin1") %>% 
  mutate_if (is.labelled, as_factor) %>% 
  select (country_name, year, v2x_polyarchy) %>% 
  filter (year >= 1945) %>% 
  mutate (cyear = str_c (country_name, year, sep="_") )
summary (vdem)



#### (1.2) Creating the datasets we'll use ####

### (1.2.1) presidential elections around the world ####

# checking for discrepancies b/w NELDA and V-Dem
summary (unique (nelda$country) %in% unique (vdem$country_name)) ## no discrepancies. Good
summary (unique (nelda$cyear) %in% unique (vdem$cyear)) ## neither here

# joining
nelda <- left_join (
  nelda, select (vdem, cyear, v2x_polyarchy), by=c ("cyear" = "cyear"))
summary (nelda)

# list of elections from bfull:
btmp <- bfull %>% 
  filter (sample=="World") %>% 
  select (country, date_r1, year_r1, rounds, r2_needed, reversion) %>% 
  group_by (country, date_r1, year_r1, rounds, r2_needed) %>% 
  summarise (reversion = max_na (reversion)) %>% 
  unique () %>% ungroup () %>% 
  mutate (
    cyear = str_c (country, year_r1, sep="_")
    , cyear = factor (cyear)
    , cdate = str_c (country, date_r1, sep="_")
    , cdate = factor (cdate)
    , runoff = 1
    , reversion = ifelse (is.na (reversion), 0, reversion)  )
summary (btmp)

# checking discrepancies
summary (unique (btmp$country) %in% unique (nelda$country)) ## no discrepancies. Good
summary (unique (nelda$country) %in% unique (btmp$country)) ## 24 discrepancies. They correspond to countries with elected presidents that never employed a runoff. Good
unique (nelda$country)[(unique (nelda$country) %in% unique (btmp$country))==FALSE]

# now the other way round
summary (sort (unique (btmp$cdate)) %in% sort (unique (nelda$cdate))) ## 28 discrepancies. They all correspond to either (a) pre-independence elections; or (b) not minimally competitive elections. Good:
sort (unique (btmp$cdate))[(sort (unique (btmp$cdate)) %in% sort (unique (nelda$cdate)))==FALSE]

# 5 were pre-independence
# Cyprus_1959
# Equatorial Guinea_1968
# Montenegro_2003
# Slovenia_1990
# Timor-Leste_2002

# 23 weren't minimally competitive:
# Burundi_2010
# Cape Verde_1996
# Djibouti_2005
# Egypt_2018
# Iran_1980
# Iran_1981 (x2)
# Iran_1985
# Iran_1989
# Iran_1993
# Iran_1997
# Kazakhstan_2015
# Madagascar_1965
# Madagascar_1982
# Madagascar_1989
# Maldives_2013
# Sao Tome and Principe_1991
# Sudan_1996
# Uzbekistan_2000
# Uzbekistan_2007
# Uzbekistan_2015
# Uzbekistan_2016
# Yemen_2012


## joining
nelda <- nelda %>% left_join (
  btmp %>% select (cdate, runoff, rounds, r2_needed, reversion)
  , by=c ("cdate" = "cdate")) %>% mutate (
    country = factor (country)
    , runoff = ifelse (is.na (runoff), 0, runoff)
    , rounds = ifelse (is.na (rounds), 1, rounds)
    , r2_needed = ifelse (is.na (r2_needed), 0, r2_needed)
    , reversion = ifelse (is.na (reversion), 0, reversion)
    , cyear = NULL
    , cdate = NULL )
summary (nelda)


## creating the yearly dataset
nelda_year <- nelda %>% 
  filter (v2x_polyarchy > 1/3) %>% 
  group_by (year) %>% summarise (
    n = n ()
    , runoff_rule = sum (runoff, na.rm=T)
    , need_r2 = sum (runoff*r2_needed, na.rm=T)
    , reverted = sum (runoff*reversion, na.rm=T)
    ) %>% 
  ungroup () %>% 
  arrange (year) %>% 
  mutate (
    
    ## summing N events during the previous 5 years
    n = roll_sum (n, n=5, align="right", fill = NA)
    , runoff_rule = roll_sum (runoff_rule, n=5, align="right", fill = NA)
    , need_r2 = roll_sum (need_r2, n=5, align="right", fill = NA)
    , reverted = roll_sum (reverted, n=5, align="right", fill = NA)
    
    ## calculating the shares of interest
    , runoff_rule_sh = runoff_rule / n * 100
    , need_r2_sh = need_r2 / runoff_rule * 100
    , reverted_sh = reverted / need_r2 * 100
    )
summary (nelda_year)


## creating the country dataset
nelda_country <- nelda %>% 
  group_by (country) %>% summarise (
    n = n ()
    , runoff_rule = sum (runoff, na.rm=T)
    , need_r2 = sum (runoff*r2_needed, na.rm=T)
    , reverted = sum (runoff*reversion, na.rm=T)
    ) %>% ungroup () %>% mutate (
    runoff_rule_sh = runoff_rule / n * 100
    , need_r2_sh = need_r2 / runoff_rule * 100
    , reverted_sh = reverted / need_r2 * 100
    )
summary (nelda_country)



### (1.2.2) Elections with runoff rule ####
## (each row is a different election) ####

## merging with NELDA data -> we only want to keep minimally competitive elections
bfull <- bfull %>% left_join (nelda %>% select (country, date, compet_nelda)
                     , by=c ("country" = "country", "date_r1" = "date")) %>% 
  filter (sample != "World" | !is.na (compet_nelda)) %>% 
  # excluding presidential elections in which the most voted party surpassed 50% of the vote, but there was a re-run b/c of low turnout
  filter (!(sample=="World" & country=="Bulgaria" & year_r1==2006)) %>% 
  filter (!(sample=="World" & country=="North Macedonia" & year_r1==2014)) %>% 
  mutate (
    compet_nelda = NULL)
summary (bfull); nrow (bfull)


## creating the samples we'll use in the analysis
bfull_full <- bfull %>% mutate (sample2 = "full")
bfull_presi <- bfull %>% filter (sample == "World") %>% mutate (sample2 = "presi")
bfull_gover <- bfull %>% filter (sample %in% c (
  "Brazil (governor)", "Argentina", "Bolivia", "Chile")) %>% mutate (sample2 = "gover")
bfull_mayor <- bfull %>% filter (sample %in% c (
  "Brazil (mayor)", "Mexico")) %>% mutate (sample2 = "mayor")
bfull_bra <- bfull %>% filter (sample %in% c (
  "Brazil (governor)", "Brazil (mayor)")) %>% mutate (sample2 = "brazil")
bfull_oth <- bfull %>% filter (sample %in% c (
  "Argentina", "Bolivia", "Chile", "Mexico")) %>% mutate (sample2 = "others")

bfull <- bind_rows (
  bfull_full, bfull_presi, bfull_gover, bfull_mayor, bfull_bra, bfull_oth) %>% 
  mutate (
    sample2 = factor (sample2, levels=c (
      "full", "presi", "gover", "mayor", "brazil", "others")) )
summary (bfull)
with (bfull, table (sample, sample2)) ## all's well

# discarding observations
rm (bfull_full, bfull_presi, bfull_gover, bfull_mayor, bfull_bra, bfull_oth)


## keeping the election-specific data only
bele <- bfull %>% 
  filter (is.na (v2x_polyarchy) | (v2x_polyarchy > 1/3)) %>% ## to exclude extremely uncompetitive elections
  select (
    sample, sample2, country, district, office, year_r1, date_r1, year_r2, date_r2, date_dist
    , elSystemExe, rule1, rule2, rule_full, threshold, presi, sp1, sp2, pp1, pp2, v2x_polyarchy
    , rounds, r2_needed, reversion_ele
    , voteTotal_r1, validTotal_r1, voteTotal_r2, validTotal_r2
    , n_cand_r1, enp_r1, enp_r2
    , share_first_r1, share_second_r1, share_third_r1, share_first_r2, share_second_r2
    , margin12, margin23, margin_r2
    , dif_to_50_first, dif_to_50_second, dif_to_thresh_first, dif_to_thresh_second
    , wiki_lr_dist12, wiki_lr_first, wiki_lr_second
    , wiki_lr_imp_dist12, wiki_lr_imp_first, wiki_lr_imp_second
    , ideoLR_fa_dist12, ideoLR_fa_first, ideoLR_fa_second
    , ideoIL_fa_dist12, ideoIL_fa_first, ideoIL_fa_second
    , ideoPM_fa_dist12, ideoPM_fa_first, ideoPM_fa_second
    , incStatus, incCandidate_any, incCandidate_none, incCandidate_first, incCandidate_second
    , expStatus, exp_bra_both, exp_bra_first, exp_bra_second, exp_bra_none
    , expStatus2, exp_bra2_both, exp_bra2_first, exp_bra2_second, exp_bra2_none
    ) %>% 
  unique ()
nrow (bele)

# single-round elections -> NA's for all second-round variables
bele1 <- bele %>% 
  filter (rounds==1) %>% 
  unique ()
nrow (bele1)

# two-rounds elections -> keep non-NA's for second round variables
bele2 <- bele %>% 
  filter (rounds==2 & !is.na (enp_r2)) %>% 
  unique ()
nrow (bele2)

# putting everything together
nrow (bele1) + nrow (bele2)
bele <- bind_rows (bele1, bele2) %>% 
  arrange (sample, country, district, office, year_r1)
summary (bele); nrow (bele)

# checking there are no repeated cases
nrow (bfull %>% filter (is.na (v2x_polyarchy) | (v2x_polyarchy > 1/3)) %>% 
        select (sample, sample2, country, district, office, date_r1) %>% 
        unique ())
nrow (bele) ## no discrepancies. Good

bele_main <- bele %>% 
  filter (sample2 == "full") %>% 
  mutate (sample2 = NULL)
summary (bele_main); nrow (bele_main)

# getting ntiles for ideological distance(s) between 1st and 2nd placed candidates
bele <- bele %>% group_by (sample2) %>% mutate (
  wiki_lr_dist12_ntile2 = ntile (desc (wiki_lr_dist12), 2) ## ntile2: w.r.t. median
  , wiki_lr_imp_dist12_ntile2 = ntile (desc (wiki_lr_imp_dist12), 2)
  , ideoLR_dist12_ntile2 = ntile (desc (ideoLR_fa_dist12), 2)
  , ideoIL_dist12_ntile2 = ntile (desc (ideoIL_fa_dist12), 2)
  , ideoPM_dist12_ntile2 = ntile (desc (ideoPM_fa_dist12), 2)) %>% 
  ungroup ()
summary (bele)



### (1.2.3) First two candidates in each election | runoff needed ####
bcand <- bfull %>% filter (
  (is.na (v2x_polyarchy) | (v2x_polyarchy > 1/3))
  & r2_needed == 1 & rank_r1 <= 2) %>% 
  mutate (
    ele_id = str_c (sample, country, district, date_r1, sep="_")
    , ele_id = factor (ele_id)
    , winner = winner*100
    , score = ifelse (rank_r1==1, margin12, -margin12)
    , elyear = ifelse (
      sample %in% c ("Brazil (governor)", "Brazil (mayor)", "Mexico")
      , str_c (country, year_r1, sep="_"), NA)
    , elyear = factor (elyear)
    , sample3 = case_when (
      sample == "Brazil (governor)" ~ "Brazil_governor"
      , sample == "Brazil (mayor)" ~ "Brazil_mayor"
      , TRUE ~ as.character (sample))
    , abs_maj = ifelse (elSystemExe == "absolute majority", 1, 0)
    , presi_dum = ifelse (presi=="presidential", 1, 0)
    , wiki_lr_na = ifelse (is.na (wiki_lr), 1, 0)
    , wiki_lr_imp_na = ifelse (is.na (wiki_lr_imp), 1, 0)
    , ideoLR_fa_na = ifelse (is.na (ideoLR_fa), 1, 0)
    , ideoIL_fa_na = ifelse (is.na (ideoIL_fa), 1, 0)
    , ideoPM_fa_na = ifelse (is.na (ideoPM_fa), 1, 0)
    , r2_needed = NULL
    , third_r1 = NULL)

# adding data on ideological distance
bcand <- bcand %>% left_join (bele %>% select (
  sample, sample2, country, office, district, date_r1, wiki_lr_dist12_ntile2:ideoPM_dist12_ntile2)
                    , by=c ("sample"="sample", "sample2"="sample2", "country"="country", "office"="office", "district"="district", "date_r1"="date_r1")) %>% ungroup ()
with (bcand, table (elyear, sample, useNA="always"))
table (bcand$sample3)

bcand_main <- bcand %>% 
  filter (sample2 == "full") %>% 
  mutate (sample2 = NULL)
summary (bcand_main); nrow (bcand_main)



#### updating WD to export the figures there
setwd (str_c (home, "paper/"))




#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#
#### // 2 // DESCRIPTIVE STATISTICS ####
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#

#### (2.1) Figure 1: Runoff elections around the world, 1949-2020  ####

### (2.1.1) Figure 1b: Evolution over time ####

# preparing the data
nelda_year_long <- nelda_year %>% mutate (
    runoff_rule_sh2 = str_c (runoff_rule_sh, n, sep='_')
    , need_r2_sh2 = str_c (need_r2_sh, runoff_rule, sep='_')
    , reverted_sh2 = str_c (reverted_sh, need_r2, sep='_') ) %>% 
  pivot_longer (cols=c ("reverted_sh2", "need_r2_sh2", "runoff_rule_sh2")
                , names_to="quantity") %>% 
  mutate (
    size = as.numeric (str_split_fixed (as.character (value), pattern="_", n=2)[,2])
    , value = as.numeric (str_split_fixed (as.character (value), pattern="_", n=2)[,1])
    , quantity = case_when (
      quantity == "runoff_rule_sh2" ~ "runoff rule\nemployed"
      , quantity == "need_r2_sh2" ~ "runoff required\n | runoff employed"
      , quantity == "reverted_sh2" ~ "2nd round reversion\n | runoff required")
    , quantity = factor (quantity, levels=c (
      "runoff rule\nemployed"
      , "runoff required\n | runoff employed"
      , "2nd round reversion\n | runoff required")))

# drawing the plot
(pRunoffWorld <- ggplot (
  data=filter (nelda_year_long, year %in% seq (1950, 2020, by=5))
                       , aes (x=year, y=value, color=quantity))
  + geom_line ()
  + geom_point (aes (size=size/100), alpha=alpha_dots)
  + scale_x_continuous (name="", limits=c (1949, 2020)
                       , breaks=seq (1950, 2020, by=5), labels=seq (1950, 2020, by=5))
  + scale_y_continuous (name="%", limits=c (0, 100)
                       , breaks=seq (0, 100, by=20), labels=seq (0, 100, by=20))
  + scale_color_manual (name="", values=c (col_movie[1], col_movie[4], col_movie[5]))
  + scale_size (guide="none")
  + theme (text=element_text(size=size_text*1.75), legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-21,-9,-9,-9)) )



### (2.1.2) Figure 1a: Average by country ####

## getting the map and checking country names
wmap <- st_as_sf (maps::map ("world", plot=FALSE, fill=TRUE, wrap=c (-180, 180, NA))) ## the NA is to exclude Antarctica
summary (wmap)

# checking country names
summary (sort (unique (nelda_country$country)) %in% sort (unique (wmap$ID))) ## 4 discrepancies only
sort (unique (nelda_country$country))[(sort (unique (nelda_country$country)) %in% sort (unique (wmap$ID)))==F]
wmap <- wmap %>% mutate (
    ID = as.character (ID)
    , ID = case_when (
      ID == "Republic of Congo" ~ "Republic of the Congo"
      , ID == "Vietnam" ~ "Republic of Vietnam"
      , ID == "Gambia" ~ "The Gambia"
      , ID == "USA" ~ "United States of America"
      , TRUE ~ ID) )
summary (sort (unique (nelda_country$country)) %in% sort (unique (wmap$ID))) ## no more discrepancies

# joining
wmap <- left_join (wmap, nelda_country, by=c ("ID" = "country")) %>% 
  mutate (ID = factor (ID))
summary (wmap)

# drawing the map
(pRunoffMap <- ggplot (data=wmap)
  + geom_sf (aes (fill=need_r2), color="white", size=size_borders)
  + scale_fill_viridis (name="elections decided\nby runoff (#)"
                        , option=col_viridis, direction=-1
                        , limits=c (0, 10), breaks=seq (0, 10, by=2)
                        , labels=seq (0, 10, by=2) )
  + theme_map (base_size=size_text*1.5)
  + theme (text=element_text (size=size_text*1.5)
           , legend.key.size=unit (size_legend, "cm")))




### (2.1.3) Exporting ####
pwid <- 12*0.85
phei <- 9*0.85
ggsave ("figures/figRunoffWorld.png"
        , pRunoffWorld, width=pwid, height=phei, units="cm", dpi=600)
ggsave ("figures/figRunoffMap.png"
        , pRunoffMap, width=pwid, height=phei, units="cm", dpi=600)



#### (2.2) Table 1: Samples included in the analysis ####

# getting the values
tabSamples <- bind_cols (
  
  # sample name
  with (bele_main, levels (sample))
  
  # office
  , c (with (bele_main, by (office, sample, function (x) as.character (unique (x)))))
  
  # period covered
  , str_c (
    with (bele_main, by (year_r1, sample, min_na))
    , with (bele_main, by (year_r1, sample, max_na))
    , sep="-")
  
  # number of districts
  , c (with (bele_main, by (district, sample, n_distinct))[])
  
  # elections with runoff
  , c (with (bele_main, table (sample)))
  
  # N of runoffs needed
  , c (with (bele_main, by (r2_needed, sample, sum))[])
  
  # % runoffs
  , sprintf ("%.1f", with (bele_main, by (r2_needed, sample, mean))*100)
  
  # N of reversions
  , c (with (bele_main, by (reversion_ele, sample, sum))[])
  
  # % reversions | runoff needed
  , sprintf ("%.1f", with (filter (bele_main, r2_needed==1), by (reversion_ele, sample, mean))*100) )

# exporting to LaTeX
(tabSamples <- rbind (tabSamples
                           , c ("\\midrule Full sample", "", ""
                                , sum (tabSamples[,4])
                                , sum (tabSamples[,5])
                                , sum (tabSamples[,6])
                                , sprintf ("%.1f", sum (tabSamples[,6])/sum (tabSamples[,5])*100)
                                , sum (tabSamples[,8])
                                , sprintf ("%.1f", sum (tabSamples[,8])/sum (tabSamples[,6])*100) )))

Header1 <- paste ("\\toprule \\multicolumn{1}{c}{} & \\multicolumn{1}{c}{} & \\multicolumn{1}{c}{} & \\multicolumn{1}{c}{number} & \\multicolumn{1}{c}{runoff} & \\multicolumn{1}{c}{second} & \\multicolumn{1}{c}{\\%} & \\multicolumn{1}{c}{number} & \\multicolumn{1}{c}{} \\\\ \n")
Header2 <- paste ("\\multicolumn{1}{c}{} & \\multicolumn{1}{c}{} & \\multicolumn{1}{c}{period} & \\multicolumn{1}{c}{of} & \\multicolumn{1}{c}{rule} & \\multicolumn{1}{c}{round} & \\multicolumn{1}{c}{second} & \\multicolumn{1}{c}{of} & \\multicolumn{1}{c}{\\%} \\\\ \n")
Header3 <- paste ("\\multicolumn{1}{c}{sample} & \\multicolumn{1}{c}{office} & \\multicolumn{1}{c}{covered} & \\multicolumn{1}{c}{districts} & \\multicolumn{1}{c}{employed} & \\multicolumn{1}{c}{needed} & \\multicolumn{1}{c}{round} & \\multicolumn{1}{c}{reversions} & \\multicolumn{1}{c}{reversions} \\\\ \\midrule \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{9}{l}{
  \\begin{minipage}{13.95cm}~\\
  \\footnotesize Except for the Brazil (governor), Chile and Mexico samples, the ``runoff rule employed'' column is not a multiple of the ``number of districts'' column because different districts held elections every 4, 5 or 6 years (World), they introduced a runoff rule at different moments in time (World, Argentina, Bolivia), or reached 200,000 registered voters in different election years (mayoral elections in Brazil).
  \\end{minipage}}\\\\")

addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 0
addtorow$pos[[4]] <- 8
addtorow$command <- c (Header1, Header2, Header3, Bottom1)
print (xtable ( as.matrix (tabSamples)
                , align=c("l","l","r","r","r","r","r","r","r","r")
                , digits=2
                , caption="{Samples included in the analysis}"
                , label="T:samples")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow  
       , file="tables/tabSamples.tex"
       )



#### (2.3) Tables with descriptive statistics ####

### (2.3.1) Table A1: Election-level data ####

## we first select the variables to report and use pivot_longer() to create a "long" table
tabDescEle <- bele %>% filter (r2_needed == 1) %>% 
  select (sample2
          , margin12, margin23, margin_r2, reversion_ele
          , n_cand_r1, enp_r1
          , wiki_lr_dist12
          , ideoLR_fa_dist12, ideoIL_fa_dist12, ideoPM_fa_dist12
          , incCandidate_first, incCandidate_second, exp_bra_first, exp_bra_second
          , date_dist) %>% 
  pivot_longer (
    cols = margin12:date_dist, names_to = "variable") %>% 
  mutate (is_na = ifelse (is.na (value), 1, 0)) %>% 
  mutate (
    loc = ifelse (sample2 %in% c (
      "full", "gover", "brazil"), "left", "right")) %>% ## panel where we'll put the observations
  group_by (sample2, loc, variable, .drop=FALSE) %>% 
  summarise (
    sum_nas = sum (is_na)
    , N = n() - sum_nas
    , mean = mean (value, na.rm=TRUE)
    , sd = sd (value, na.rm=TRUE)
    , min = min_na (value)
    , max = max_na (value)) %>% ungroup () %>% 
  mutate (
    sum_nas = NULL
    , N = ifelse (N==0 | is.na (mean), NA, N)
    , mean = ifelse (N==0 | is.na (N), NA, mean)
    , variable = factor (variable, levels=c (
      "margin12", "margin23", "margin_r2", "reversion_ele", "n_cand_r1", "enp_r1"
      , "wiki_lr_dist12"
      , "ideoLR_fa_dist12", "ideoIL_fa_dist12", "ideoPM_fa_dist12"
      , "incCandidate_first", "incCandidate_second", "exp_bra_first", "exp_bra_second"
      , "date_dist"))
    , varname = as.character (variable)
    , varname = case_when (
      
      ## election characteristics
      varname == "margin12" ~ "\\emph{margin (1vs2)} (0:50)"
      , varname == "margin23" ~ "\\emph{margin (2vs3)} (0:25)"
      , varname == "margin_r2" ~ "\\emph{margin (1vs2)} (\\textsc{r}2) (0:100)"
      , varname == "reversion_ele" ~ "\\emph{reversion in second round} (0/1)"
      , varname == "n_cand_r1" ~ "\\emph{number of candidates} (\\#)"
      , varname == "enp_r1" ~ "\\emph{effective number of candidates}"
      
      ## ideology distances
      ,  varname == "wiki_lr_dist12" ~ "$|$\\emph{ideol. distance (1vs2)}$|$ (Left-Right, Wikipedia)"
      ,  varname == "ideoLR_fa_dist12" ~ "$|$\\emph{ideol. distance (1vs2)}$|$ (Left-Right, \\textsc{v}-\\textsc{p}arty)"
      ,  varname == "ideoIL_fa_dist12" ~ "$|$\\emph{ideol. distance (1vs2)}$|$ ((Il)Liberalism)"
      ,  varname == "ideoPM_fa_dist12" ~ "$|$\\emph{ideol. distance (1vs2)}$|$ (Post-Materialism)"
      ,  varname == "incCandidate_first" ~ "\\emph{incumbent first-placed} (0/1)"
      ,  varname == "incCandidate_second" ~ "\\emph{incumbent second-placed} (0/1)"
      ,  varname == "exp_bra_first" ~ "\\emph{first-placed is experienced} (0/1)"
      ,  varname == "exp_bra_second" ~ "\\emph{second-placed is experienced} (0/1)"
      
      ## other characteristics
      , varname == "date_dist" ~ "\\emph{distance b/w first and second round} (days)"
      )) %>% arrange (sample2, variable)

# exporting to LaTeX
tabDescEle <- bind_cols (
  as.matrix (tabDescEle[tabDescEle$loc=="left",9])
  , format (round (as.matrix (tabDescEle[tabDescEle$loc=="left",4]), 0))
  , format (round (as.matrix (tabDescEle[tabDescEle$loc=="left",5:8]), 2))
  , rep ("", nrow (tabDescEle[tabDescEle$loc=="left",]))
  , format (round (as.matrix (tabDescEle[tabDescEle$loc=="right",4]), 0))
  , format (round (as.matrix (tabDescEle[tabDescEle$loc=="right",5:8]), 2)) )
tabDescEle[tabDescEle=="    NA"] <- ""
tabDescEle[tabDescEle==" NA"] <- ""
tabDescEle

Header1 <- paste (" & \\multicolumn{1}{c}{$N$} & \\multicolumn{1}{c}{mean} & \\multicolumn{1}{c}{\\textsc{sd}} & \\multicolumn{1}{c}{min} & \\multicolumn{1}{c}{max} & & \\multicolumn{1}{c}{$N$} & \\multicolumn{1}{c}{mean} & \\multicolumn{1}{c}{\\textsc{sd}} & \\multicolumn{1}{c}{min} & \\multicolumn{1}{c}{max} \\\\ \\midrule \n")
Header2 <- paste ("\\toprule & \\multicolumn{5}{c}{(a) Full sample} & & \\multicolumn{5}{c}{(b) Presidential elections} \\\\ \\cmidrule{2-6} \\cmidrule{8-12} \n")
Header3 <- paste ("[1.5ex] & \\multicolumn{5}{c}{(c) Gubernatorial elections} & & \\multicolumn{5}{c}{(d) Mayoral elections} \\\\ \\cmidrule{2-6} \\cmidrule{8-12} \n")
Header4 <- paste ("[1.5ex] & \\multicolumn{5}{c}{(e) Subnational  (Brazil)} & & \\multicolumn{5}{c}{(f) Subnational (outside Brazil)} \\\\ \\cmidrule{2-6} \\cmidrule{8-12} \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{16.0cm}~\\\\
  \\footnotesize Only elections in which a second round was needed to determine the winner are included.
  Unless specifically noted, all variables are measured in the first round or have a common value for both rounds.
  \\end{minipage}}\\\\")

addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 15
addtorow$pos[[4]] <- 30
addtorow$pos[[5]] <- 45
addtorow$command <- c (Header2, Header1, Header3, Header4, Bottom1)
print (xtable ( as.matrix (tabDescEle)
                , align=c("l","l","r","r","r","r","r","r","r","r","r","r","r")
                , digits=2
                , caption="{Descriptive statistics (\\textsc{i}): Election-level characteristics}"
                , label="T:descEle")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow  
       , file="tables/tabDescElection.tex"
       )



### (2.3.2) Table A2: Candidate-level data ####

## we first select the variables to report and use pivot_longer() to create a "long" table
tabDescCand <- bcand %>% 
  select (sample2, rank_r1
          , winner, share_r1, share_r2
          , wiki_lr, ideoLR_fa, ideoIL_fa, ideoPM_fa) %>% 
  pivot_longer (cols = winner:ideoPM_fa, names_to = "variable") %>% 
  filter (!is.na (value)) %>% 
  group_by (sample2, rank_r1, variable) %>% 
  summarise (
    N = n()
    , mean = mean (value)
    , sd = sd (value)
    , min = min (value)
    , max = max (value)) %>% ungroup () %>% 
  pivot_wider (names_from = "rank_r1", values_from = N:max) %>% 
  relocate (N_2, mean_2, sd_2, min_2, max_2, .after = max_1) %>% 
  mutate (
    variable = factor (variable, levels=c (
      "winner", "share_r1", "share_r2", "wiki_lr"#, "wiki_lr_imp"
      , "ideoLR_fa", "ideoIL_fa", "ideoPM_fa"))
    , varname = as.character (variable)
    , varname = case_when (
      
      ## election outcomes
      varname == "winner" ~ "\\emph{winner} (0/100)"
      ,  varname == "share_r1" ~ "\\emph{vote share} (0:50)"
      ,  varname == "share_r2" ~ "\\emph{vote share} (\\textsc{r}2) (0:100)"
      
      ## ideology scores
      ,  varname == "wiki_lr" ~ "\\emph{ideology} (Left-Right, Wikipedia)"
      ,  varname == "ideoLR_fa" ~ "\\emph{ideology} (Left-Right, \\textsc{v}-\\textsc{p}arty)"
      ,  varname == "ideoIL_fa" ~ "\\emph{ideology} ((Il)Liberalism)"
      ,  varname == "ideoPM_fa" ~ "\\emph{ideology} (Post-Materialism)"
    )) %>% arrange (sample2, variable)

# exporting to LaTeX
tabDescCand <- bind_cols (
  as.matrix (tabDescCand[,13])
  , format (round (as.matrix (tabDescCand[,3]), 0))
  , format (round (as.matrix (tabDescCand[,4:7]), digits))
  , rep ("", nrow (tabDescCand))
  , format (round (as.matrix (tabDescCand[,8]), 0))
  , format (round (as.matrix (tabDescCand[,9:12]), digits)) )
tabDescCand

Header1 <- paste ("\\toprule & \\multicolumn{5}{c}{top placed (first round)} & & \\multicolumn{5}{c}{runner-up (first round)} \\\\ \\cmidrule{2-6} \\cmidrule{8-12} \n")
Header2 <- paste ("\\multicolumn{1}{l}{(a) Full sample} & \\multicolumn{1}{c}{$N$} & \\multicolumn{1}{c}{mean} & \\multicolumn{1}{c}{\\textsc{sd}} & \\multicolumn{1}{c}{min} & \\multicolumn{1}{c}{max} & & \\multicolumn{1}{c}{$N$} & \\multicolumn{1}{c}{mean} & \\multicolumn{1}{c}{\\textsc{sd}} & \\multicolumn{1}{c}{min} & \\multicolumn{1}{c}{max} \\\\ \\midrule \n")
Header3 <- paste ("[0.75ex] \\multicolumn{12}{l}{(b) Presidential elections} \\\\ \\midrule \n")
Header4 <- paste ("[0.75ex] \\multicolumn{12}{l}{(c) Gubernatorial elections} \\\\ \\midrule \n")
Header5 <- paste ("[0.75ex] \\multicolumn{12}{l}{(d) Mayoral elections} \\\\ \\midrule \n")
Header6 <- paste ("[0.75ex] \\multicolumn{12}{l}{(e) Subnational (Brazil)} \\\\ \\midrule \n")
Header7 <- paste ("[0.75ex] \\multicolumn{12}{l}{(f) Subnational (outside Brazil)} \\\\ \\midrule \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{14.75cm}~\\\\
  \\footnotesize Only elections in which a second round was needed to determine the winner are included.
  Unless specifically noted, all variables are measured in the first round or have a common value for both rounds.
  \\end{minipage}}\\\\")

addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 7
addtorow$pos[[4]] <- 14
addtorow$pos[[5]] <- 21
addtorow$pos[[6]] <- 28
addtorow$pos[[7]] <- 35
addtorow$pos[[8]] <- 42
addtorow$command <- c (Header1, Header2, Header3, Header4, Header5, Header6, Header7
                       , Bottom1)
print (xtable ( as.matrix (tabDescCand)
                , align=c("l","l","r","r","r","r","r","r","r","r","r","r","r")
                , digits=2
                , caption="{Descriptive statistics (\\textsc{ii}): First- and Second-placed candidates}"
                , label="T:descCand")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow  
       , file="tables/tabDescCandidate.tex"
       )




#### (2.4) Descriptives per sample (I): Votes ####

### (2.4.1) Figure A1a: Vote shares 1st, 2nd and 3rd ####

# converting the data
bele_long <- bele %>% 
  filter (r2_needed == 1) %>% 
  pivot_longer (cols=share_first_r1:share_third_r1, names_to="quantity") %>% 
  mutate (
    quantity = case_when (
      quantity == "share_first_r1" ~ "first"
      , quantity == "share_second_r1" ~ "second"
      , quantity == "share_third_r1" ~ "third")
    , sample = case_when (
      sample2 == "full" ~ "full sample"
      , sample2 == "presi" ~ "presidential elections"
      , sample2 == "gover" ~ "gubernatorial elections"
      , sample2 == "mayor" ~ "mayoral elections"
      , sample2 == "brazil" ~ "subnational (Brazil)"
      , sample2 == "others" ~ "subnational (outside Brazil)")
    , sample = factor (sample, levels = c (
      "full sample", "presidential elections", "gubernatorial elections"
      , "mayoral elections", "subnational (Brazil)", "subnational (outside Brazil)")) )

# drawing the plot(s)
(pDescShares <- ggplot (data=bele_long, aes (x=value, fill=quantity))
  + geom_density (alpha=alpha_dots*1.25)
  + scale_x_continuous (name="% votes (first round)", limits=c (0, 50)
                        , breaks=seq (0, 50, by=10), labels=seq (0, 50, by=10))
  + scale_y_continuous (limits=c (0, 0.08), breaks=seq (0, 0.08, by=0.02))
  + scale_fill_manual (name="", values=c (col_movie[1], col_movie[5], col_movie[4]))
  + facet_wrap (~ sample, ncol=1)
  + theme (strip.background=element_rect (fill=col_movie[3]), legend.position="bottom", legend.title=element_blank (), legend.text=element_text (size=size_text*2.25), legend.box.margin=margin (-0,-9,-9,-9)) )



### (2.4.2) Figure A1b: Margins 1st vs 2nd and 2nd vs 3rd ####

# converting the data
bele_long <- bele %>% 
  filter (r2_needed == 1) %>% 
  pivot_longer (cols=margin12:margin23, names_to="quantity") %>% 
  mutate (
    quantity = case_when (
      quantity == "margin12" ~ "first vs. second"
      , quantity == "margin23" ~ "second vs. third")
    , sample = case_when (
      sample2 == "full" ~ "full sample"
      , sample2 == "presi" ~ "presidential elections"
      , sample2 == "gover" ~ "gubernatorial elections"
      , sample2 == "mayor" ~ "mayoral elections"
      , sample2 == "brazil" ~ "subnational (Brazil)"
      , sample2 == "others" ~ "subnational (outside Brazil)")
    , sample = factor (sample, levels = c (
      "full sample", "presidential elections", "gubernatorial elections"
      , "mayoral elections", "subnational (Brazil)", "subnational (outside Brazil)")) )


# drawing the plot(s)
(pDescMargins <- ggplot (data=bele_long, aes (x=value, fill=quantity))
  + geom_density (alpha=alpha_dots)
  + scale_x_continuous (name="% margin (first round)", limits=c (0, 50)
                        , breaks=seq (0, 50, by=10), labels=seq (0, 50, by=10))
  + scale_y_continuous (limits=c (0, 0.08), breaks=seq (0, 0.08, by=0.02))
  + scale_fill_manual (name="", values=c (col_movie[1], col_movie[5]))
  + facet_wrap (~ sample, ncol=1)
  + theme (strip.background=element_rect (fill=col_movie[3]), legend.position="bottom", legend.title=element_blank (), legend.text=element_text (size=size_text*2.25), legend.box.margin=margin (-0,-9,-9,-9)) )



### (2.4.3) Figure A2: Scatter 1st vs 2nd + reversions ####

# getting sample names
bele <- bele %>% mutate (
  sample2b = case_when (
    sample2 == "full" ~ "full sample"
    , sample2 == "presi" ~ "presidential elections"
    , sample2 == "gover" ~ "gubernatorial elections"
    , sample2 == "mayor" ~ "mayoral elections"
    , sample2 == "brazil" ~ "subnational (Brazil)"
    , sample2 == "others" ~ "subnational (outside Brazil)")
  , sample2b = factor (sample2b, levels = c (
    "full sample", "presidential elections", "gubernatorial elections"
    , "mayoral elections", "subnational (Brazil)", "subnational (outside Brazil)")) )
  
# drawing the plot(s)
(pDescShares1vs2 <- ggplot (
  data=bele, aes (x=share_first_r1, y=share_second_r1, color=factor (reversion_ele)))
  + geom_vline (xintercept=50, linetype=2, color=gray_lines)
  + geom_abline (linetype=2, color=gray_lines)
  + geom_point (alpha=alpha_dots*1.5, size=size_bin)
  + scale_x_continuous (name="vote first round: first-placed candidate (%)", limits=c (0, 100)
                        , breaks=seq (0, 100, by=10), labels=seq (0, 100, by=10))
  + scale_y_continuous (name="vote first round: runner-up (%)", limits=c (0, 50)
                        , breaks=seq (0, 50, by=10), labels=seq (0, 50, by=10))
  + scale_color_manual (name="second round reversal", labels=c ("no", "yes"), values=c (col_movie[1], col_movie[5]))
  + facet_wrap (~ sample2b, ncol=2)
  + theme (strip.background=element_rect (fill=col_movie[3]), legend.position="bottom", legend.text=element_text (size=size_text*2.25), legend.box.margin=margin (-9,-9,-9,-9)) )



### (2.4.4) Exporting ####
pwid <- 12*0.95
phei <- 39*0.80
ggsave ("figures/figDescShares.png"
        , pDescShares, width=pwid, height=phei, units="cm", dpi=600)
ggsave ("figures/figDescMargins.png"
        , pDescMargins, width=pwid, height=phei, units="cm", dpi=600)
ggsave ("figures/figDescShares1vs2.png"
        , pDescShares1vs2, width=pwid*1.5, height=phei*0.75, units="cm", dpi=600)




#### (2.5) Descriptives per sample (II): Ideology ####

### (2.5.1) Figure A3: (Non-)Missingness % ####

# identifying missing values
bele <- bele %>% mutate (
    
  ## Wiki
  wiki_lr_first_na = ifelse (is.na (wiki_lr_first), 0, 1)
  , wiki_lr_second_na = ifelse (is.na (wiki_lr_second), 0, 1)
  , wiki_lr_12_na = wiki_lr_first_na * wiki_lr_second_na ## 1 iff both are non-NA
  
  ## Wiki (imputed)
  , wiki_lr_imp_first_na = ifelse (is.na (wiki_lr_imp_first), 0, 1)
  , wiki_lr_imp_second_na = ifelse (is.na (wiki_lr_imp_second), 0, 1)
  , wiki_lr_imp_12_na = wiki_lr_imp_first_na * wiki_lr_imp_second_na ## 1 iff both are non-NA
  
  ## L-R
  , ideoLR_fa_first_na = ifelse (is.na (ideoLR_fa_first), 0, 1)
  , ideoLR_fa_second_na = ifelse (is.na (ideoLR_fa_second), 0, 1)
  , ideoLR_fa_12_na = ideoLR_fa_first_na * ideoLR_fa_second_na ## 1 iff both are non-NA
  
  ## IL
  , ideoIL_fa_first_na = ifelse (is.na (ideoIL_fa_first), 0, 1)
  , ideoIL_fa_second_na = ifelse (is.na (ideoIL_fa_second), 0, 1)
  , ideoIL_fa_12_na = ideoIL_fa_first_na * ideoIL_fa_second_na
  
  ## P-M
  , ideoPM_fa_first_na = ifelse (is.na (ideoPM_fa_first), 0, 1)
  , ideoPM_fa_second_na = ifelse (is.na (ideoPM_fa_second), 0, 1)
  , ideoPM_fa_12_na = ideoPM_fa_first_na * ideoPM_fa_second_na
  ) %>% ungroup ()
summary (bele)

# there are only minor differences in the V-Party ideology measures:
with (bele, table (ideoLR_fa_first_na, ideoIL_fa_first_na, ideoPM_fa_first_na)) ## no discrepancies
with (bele, table (ideoLR_fa_second_na, ideoIL_fa_second_na, ideoPM_fa_second_na)) ## just 2 discrepancies b/w LR and IL

# pivoting to create the dataset for the plots
bele_long <- bele %>% 
  select (sample2, sample2b, r2_needed, wiki_lr_first_na:ideoLR_fa_12_na) %>% 
  pivot_longer (cols=c (wiki_lr_first_na:ideoLR_fa_12_na), names_to="position") %>% 
  mutate (
    measure = ifelse (grepl ("wiki", position), "wiki", "vparty")
    , position = case_when (
      grepl ("first", position) ~ "1st"
      , grepl ("second", position) ~ "2nd"
      , grepl ("12", position) ~ "1st & 2nd")
    , position = factor (position, levels=c (
      "1st", "2nd"#, "3rd"
      , "1st & 2nd"#, "2nd & 3rd", "1st to 3rd"
      ))) %>% 
  group_by (sample2, sample2b, r2_needed, measure, position) %>% 
  summarise (value = mean (value)*100)
summary (bele_long)

# drawing the plots
(pDescIdeoMissingWiki <- ggplot (
  data=bele_long %>% filter (measure=="wiki")
  , aes (x=position, y=value, fill=factor (r2_needed)))
  + geom_hline (yintercept=50, linetype=2, color=gray_lines)
  + geom_bar (stat="identity", position="dodge")
  + xlab ("position in first round")
  + ylab ("Ideology: % non-missing")
  + scale_y_continuous (name="Ideology: non-missing (%)", limits=c (0, 100)
                        , breaks=seq (0, 100, by=20), labels=seq (0, 100, by=20))
  + scale_fill_manual (name="second round needed", labels=c ("no", "yes"), values=c (col_movie[1], col_movie[5]))
  + facet_wrap (~ sample2b, ncol=1)
  + theme (strip.background=element_rect (fill=col_movie[3]), legend.position="bottom", legend.text=element_text (size=size_text*1.75), legend.box.margin=margin (-9,-9,-9,-9)) )

(pDescIdeoMissingVParty <- ggplot (
  data=bele_long %>% filter (measure=="vparty")
  , aes (x=position, y=value, fill=factor (r2_needed)))
  + geom_hline (yintercept=50, linetype=2, color=gray_lines)
  + geom_bar (stat="identity", position="dodge")
  + xlab ("position in first round")
  + ylab ("Ideology: % non-missing")
  + scale_y_continuous (name="Ideology: non-missing (%)", limits=c (0, 100)
                        , breaks=seq (0, 100, by=20), labels=seq (0, 100, by=20))
  + scale_fill_manual (name="second round needed", labels=c ("no", "yes"), values=c (col_movie[1], col_movie[5]))
  + facet_wrap (~ sample2b, ncol=1)
  + theme (strip.background=element_rect (fill=col_movie[3]), legend.position="bottom", legend.text=element_text (size=size_text*1.75), legend.box.margin=margin (-9,-9,-9,-9)) )



### (2.5.2) Figure A4: Correlation between scores ####

## computing the correlations
ideol <- c ("wiki_lr", "ideoLR_fa", "ideoIL_fa", "ideoPM_fa")
correls <- bind_cols (
  c ("\u03c1 full", "\u03c1 presidential", "\u03c1 governor", "\u03c1 mayor"
     , "\u03c1 Brazil", "\u03c1 outside Brazil")
  , rbind (
    sprintf ("%.2f", cor (bcand %>% ungroup () %>% filter (sample2=="full") %>% select (ideol), use="pairwise.complete.obs")[c (2:4,7:8,12)])
    , sprintf ("%.2f", cor (bcand %>% ungroup () %>% filter (sample2=="presi") %>% select (ideol), use="pairwise.complete.obs")[c (2:4,7:8,12)])
    , sprintf ("%.2f", cor (bcand %>% ungroup () %>% filter (sample2=="gover") %>% select (ideol), use="pairwise.complete.obs")[c (2:4,7:8,12)])
    , sprintf ("%.2f", cor (bcand %>% ungroup () %>% filter (sample2=="mayor") %>% select (ideol), use="pairwise.complete.obs")[c (2:4,7:8,12)])
    , sprintf ("%.2f", cor (bcand %>% ungroup () %>% filter (sample2=="brazil") %>% select (ideol), use="pairwise.complete.obs")[c (2:4,7:8,12)])
    , sprintf ("%.2f", cor (bcand %>% ungroup () %>% filter (sample2=="others") %>% select (ideol), use="pairwise.complete.obs")[c (2:4,7:8,12)]) ))
correls <- as.data.frame (as.matrix (correls))
colnames (correls) <- c ("sample", "Wiki_LR", "Wiki_IL", "Wiki_PM", "LR_IL", "LR_PM", "IL_PM")
correls
#       sample        Wiki_LR Wiki_IL Wiki_PM LR_IL LR_PM IL_PM
# 1           ρ full    0.86   -0.34   -0.79 -0.15 -0.69  0.56
# 2   ρ presidential    0.76   -0.18   -0.45 -0.04 -0.38  0.60
# 3       ρ governor    0.93   -0.41   -0.96  0.00 -0.79  0.38
# 4          ρ mayor    0.94   -0.52   -0.97 -0.50 -0.90  0.53
# 5         ρ Brazil    0.94   -0.54   -0.97 -0.57 -0.89  0.59
# 6 ρ outside Brazil    0.96    0.10   -0.96  0.47 -0.81 -0.23

## drawing the plots
range (bcand %>% select (wiki_lr, ideoLR_fa:ideoPM_fa), na.rm=TRUE) ## -3.5 to 3.5

# Wikipedia and left-right
(pCorrelWiki_LR <- ggplot (filter (bcand, sample2=="full" & !is.na ("wiki_lr") & !is.na ("ideoLR_fa")), aes (x=wiki_lr, y=ideoLR_fa))
  + geom_abline (intercept=0, slope=1, linetype=1)
  + geom_abline (intercept=rep (0, 5), slope=as.numeric (correls[2:6,]$Wiki_LR), col=gray_lines, linetype=2)
  + geom_abline (intercept=0, slope=as.numeric (correls[1,2]), col=col_movie[5])
  + geom_point (col=col_movie[2], size=size_bin)
  + scale_x_continuous (name="Left-Right score (Wikipedia)", limits=c (-3.5, 3.5)
                        , breaks=seq (-3, 3, by=1), labels=seq (-3, 3, by=1))
  + scale_y_continuous (name="Left-Right score (V-Party)", limits=c (-3.5, 3.5)
                        , breaks=seq (-3, 3, by=1), labels=seq (-3, 3, by=1)))
  
# Wikipedia and (il)liberalism
(pCorrelWiki_IL <- ggplot (filter (bcand, sample2=="full" & !is.na ("wiki_lr") & !is.na ("ideoIL_fa")), aes (x=wiki_lr, y=ideoIL_fa))
  + geom_abline (intercept=0, slope=-1, linetype=1)
  + geom_abline (intercept=rep (0, 5), slope=as.numeric (correls[2:6,]$Wiki_IL), col=gray_lines, linetype=2)
  + geom_abline (intercept=0, slope=as.numeric (correls[1,3]), col=col_movie[5])
  + geom_point (col=col_movie[2], size=size_bin)
  + scale_x_continuous (name="Left-Right score (Wikipedia)", limits=c (-3.5, 3.5)
                        , breaks=seq (-3, 3, by=1), labels=seq (-3, 3, by=1))
  + scale_y_continuous (name="(Il)Liberalism score (V-Party)", limits=c (-3.5, 3.5)
                        , breaks=seq (-3, 3, by=1), labels=seq (-3, 3, by=1)))

# Wikipedia and post-materialism
(pCorrelWiki_PM <- ggplot (filter (bcand, sample2=="full" & !is.na ("wiki_lr") & !is.na ("ideoPM_fa")), aes (x=wiki_lr, y=ideoPM_fa))
  + geom_abline (intercept=0, slope=-1, linetype=1)
  + geom_abline (intercept=rep (0, 5), slope=as.numeric (correls[2:6,]$Wiki_PM), col=gray_lines, linetype=2)
  + geom_abline (intercept=0, slope=as.numeric (correls[1,4]), col=col_movie[5])
  + geom_point (col=col_movie[2], size=size_bin)
  + scale_x_continuous (name="Left-Right score (Wikipedia)", limits=c (-3.5, 3.5)
                        , breaks=seq (-3, 3, by=1), labels=seq (-3, 3, by=1))
  + scale_y_continuous (name="Post-Materialism score (V-Party)", limits=c (-3.5, 3.5)
                        , breaks=seq (-3, 3, by=1), labels=seq (-3, 3, by=1)))

# left-right and (il)liberalism
(pCorrelLR_IL <- ggplot (filter (bcand, sample2=="full" & !is.na ("ideoLR_fa"))
                         , aes (x=ideoLR_fa, y=ideoIL_fa))
  + geom_abline (intercept=0, slope=-1, linetype=1)
  + geom_abline (intercept=rep (0, 5), slope=as.numeric (correls[2:6,]$LR_IL), col=gray_lines, linetype=2)
  + geom_abline (intercept=0, slope=as.numeric (correls[1,5]), col=col_movie[5])
  + geom_point (col=col_movie[2], size=size_bin)
  + scale_x_continuous (name="Left-Right score (V-Party)", limits=c (-3.5, 3.5)
                        , breaks=seq (-3, 3, by=1), labels=seq (-3, 3, by=1))
  + scale_y_continuous (name="(Il)Liberalism score (V-Party)", limits=c (-3.5, 3.5)
                        , breaks=seq (-3, 3, by=1), labels=seq (-3, 3, by=1)))

# left-right and post-materialism
(pCorrelLR_PM <- ggplot (filter (bcand, sample2=="full" & !is.na ("ideoLR_fa"))
                         , aes (x=ideoLR_fa, y=ideoPM_fa))
  + geom_abline (intercept=0, slope=-1, linetype=1)
  + geom_abline (intercept=rep (0, 5), slope=as.numeric (correls[2:6,]$LR_PM), col=gray_lines, linetype=2)
  + geom_abline (intercept=0, slope=as.numeric (correls[1,6]), col=col_movie[5])
  + geom_point (col=col_movie[2], size=size_bin)
  + scale_x_continuous (name="Left-Right score (V-Party)", limits=c (-3.5, 3.5)
                        , breaks=seq (-3, 3, by=1), labels=seq (-3, 3, by=1))
  + scale_y_continuous (name="Post-Materialism score (V-Party)", limits=c (-3.5, 3.5)
                        , breaks=seq (-3, 3, by=1), labels=seq (-3, 3, by=1)))

# (il)liberalism and post-materialism
(pCorrelIL_PM <- ggplot (filter (bcand, sample2=="full" & !is.na ("ideoLR_fa"))
                         , aes (x=ideoIL_fa, y=ideoPM_fa))
  + geom_abline (intercept=0, slope=1, linetype=1)
  + geom_abline (intercept=rep (0, 5), slope=as.numeric (correls[2:6,]$IL_PM), col=gray_lines, linetype=2)
  + geom_abline (intercept=0, slope=as.numeric (correls[1,7]), col=col_movie[5])
  + geom_point (col=col_movie[2], size=size_bin)
  + scale_x_continuous (name="(Il)Liberalism score score (V-Party)", limits=c (-3.5, 3.5)
                        , breaks=seq (-3, 3, by=1), labels=seq (-3, 3, by=1))
  + scale_y_continuous (name="Post-Materialism score (V-Party)", limits=c (-3.5, 3.5)
                        , breaks=seq (-3, 3, by=1), labels=seq (-3, 3, by=1)))


## exporting
pwid <- 10*0.85
phei <- 10*0.85
ggsave ("figures/figCorrelWiki_LR.png"
        , pCorrelWiki_LR, width=pwid, height=phei, units="cm")
ggsave ("figures/figCorrelWiki_IL.png"
        , pCorrelWiki_IL, width=pwid, height=phei, units="cm")
ggsave ("figures/figCorrelWiki_PM.png"
        , pCorrelWiki_PM, width=pwid, height=phei, units="cm")
ggsave ("figures/figCorrelLR_IL.png"
        , pCorrelLR_IL, width=pwid, height=phei, units="cm")
ggsave ("figures/figCorrelLR_PM.png"
        , pCorrelLR_PM, width=pwid, height=phei, units="cm")
ggsave ("figures/figCorrelIL_PM.png"
        , pCorrelIL_PM, width=pwid, height=phei, units="cm")




### (2.5.3) Ideology 1st, 2nd and 3rd ####

# preparing the data
bele_long <- bele %>% 
  filter (r2_needed == 1) %>% 
  select (sample2, wiki_lr_first:wiki_lr_second
          , ideoLR_fa_first:ideoLR_fa_second
          , ideoIL_fa_first:ideoIL_fa_second
          , ideoPM_fa_first:ideoPM_fa_second) %>% 
  pivot_longer (cols=wiki_lr_first:ideoPM_fa_second, names_to="quantity") %>% 
  mutate (
    ideo_measure = case_when (
      grepl ("wiki", quantity) ~ "wiki"
      , grepl ("LR", quantity) ~ "LR"
      , grepl ("IL", quantity) ~ "IL"
      , grepl ("PM", quantity) ~ "PM")
    , ideo_measure = factor (ideo_measure, levels=c (
      "wiki", "LR", "IL", "PM"))
    , quantity = case_when (
      grepl ("first", quantity) ~ "first"
      , grepl ("second", quantity) ~ "second")
    , quantity = factor (quantity, levels=c (
      "first", "second"))
    , sample = case_when (
      sample2 == "full" ~ "full sample"
      , sample2 == "presi" ~ "presidential elections"
      , sample2 == "gover" ~ "gubernatorial elections"
      , sample2 == "mayor" ~ "mayoral elections"
      , sample2 == "brazil" ~ "subnational (Brazil)"
      , sample2 == "others" ~ "subnational (outside Brazil)")
    , sample = factor (sample, levels = c (
      "full sample", "presidential elections", "gubernatorial elections"
      , "mayoral elections", "subnational (Brazil)", "subnational (outside Brazil)")) )
summary (bele_long)
range (bele_long$value, na.rm=T) ## -3.4 to 3.25

# Figure A6a: ideology: Wikipedia
(pDescIdeoWiki <- ggplot (data=filter (bele_long, ideo_measure=="wiki")
                        , aes (x=value, fill=quantity))
  + geom_density (alpha=alpha_dots)
  + scale_x_continuous (name="Left-Right score (Wikipedia)", limits=c (-3.4, 3.25)
                        , breaks=c (-3, seq (-2, 2, by=1), 3), labels=c ("left", seq (-2, 2, by=1), "right"))
  + scale_fill_manual (name="", values=c (col_movie[1], col_movie[5]))
  + facet_wrap (~ sample, ncol=1)
  + theme (strip.background=element_rect (fill=col_movie[3]), legend.position="bottom", legend.title=element_blank (), legend.text=element_text (size=size_text*2.25), legend.box.margin=margin (-0,-9,-9,-9)) )

# Figure A6b: ideology: LR
(pDescIdeoLR <- ggplot (data=filter (bele_long, ideo_measure=="LR")
                        , aes (x=value, fill=quantity))
  + geom_density (alpha=alpha_dots)
  + scale_x_continuous (name="Left-Right score (V-Party)", limits=c (-3.4, 3.25)
                        , breaks=c (-3, seq (-2, 2, by=1), 3), labels=c ("left", seq (-2, 2, by=1), "right"))
  + scale_fill_manual (name="", values=c (col_movie[1], col_movie[5]))
  + facet_wrap (~ sample, ncol=1)
  + theme (strip.background=element_rect (fill=col_movie[3]), legend.position="bottom", legend.title=element_blank (), legend.text=element_text (size=size_text*2.25), legend.box.margin=margin (-0,-9,-9,-9)) )

# Figure A7a: ideology: IL
(pDescIdeoIL <- ggplot (data=filter (bele_long, ideo_measure=="IL")
                        , aes (x=value, fill=quantity))
  + geom_density (alpha=alpha_dots)
  + scale_x_continuous (name="(Il)Liberalism score (V-Party)", limits=c (-3.4, 3.25)
                        , breaks=c (-3, seq (-2, 2, by=1), 3), labels=c ("very\nilliberal", seq (-2, 2, by=1), "very\nliberal"))
  + scale_fill_manual (name="", values=c (col_movie[1], col_movie[5]))
  + facet_wrap (~ sample, ncol=1)
  + theme (strip.background=element_rect (fill=col_movie[3]), legend.position="bottom", legend.title=element_blank (), legend.text=element_text (size=size_text*2.25), legend.box.margin=margin (-0,-9,-9,-9)) )

# Figure A7b: ideology: PM
(pDescIdeoPM <- ggplot (data=filter (bele_long, ideo_measure=="PM")
                        , aes (x=value, fill=quantity))
  + geom_density (alpha=alpha_dots)
  + scale_x_continuous (name="Post-Materialism score (V-Party)", limits=c (-3.4, 3.25)
                        , breaks=c (-3, seq (-2, 2, by=1), 3), labels=c ("materialist", seq (-2, 2, by=1), "post-\nmaterialist"))
  + scale_fill_manual (name="", values=c (col_movie[1], col_movie[5]))
  + facet_wrap (~ sample, ncol=1)
  + theme (strip.background=element_rect (fill=col_movie[3]), legend.position="bottom", legend.title=element_blank (), legend.text=element_text (size=size_text*2.25), legend.box.margin=margin (-0,-9,-9,-9)) )



### (2.5.4) Figure A5: Polarization: Wiki, LR, IL, PM ####

# preparing the data
bele_long <- bele %>% 
  filter (r2_needed == 1) %>% 
  select (sample2, wiki_lr_dist12, ideoLR_fa_dist12, ideoIL_fa_dist12, ideoPM_fa_dist12) %>% 
  pivot_longer (cols=wiki_lr_dist12:ideoPM_fa_dist12, names_to="quantity") %>% 
  mutate (
    ideo_measure = case_when (
      quantity == "wiki_lr_dist12" ~ "Wikipedia: Left-Right"
      , quantity == "ideoLR_fa_dist12" ~ "V-Party: Left-Right"
      , quantity == "ideoIL_fa_dist12" ~ "V-Party: (Il)liberalism"
      , quantity == "ideoPM_fa_dist12" ~ "V-Party: Post-Materialism")
    , ideo_measure = factor (ideo_measure, levels=c (
      "Wikipedia: Left-Right", "V-Party: Left-Right", "V-Party: (Il)liberalism", "V-Party: Post-Materialism"))
    , sample = case_when (
      sample2 == "full" ~ "full sample"
      , sample2 == "presi" ~ "presidential elections"
      , sample2 == "gover" ~ "gubernatorial elections"
      , sample2 == "mayor" ~ "mayoral elections"
      , sample2 == "brazil" ~ "subnational (Brazil)"
      , sample2 == "others" ~ "subnational (outside Brazil)")
    , sample = factor (sample, levels = c (
      "full sample", "presidential elections", "gubernatorial elections"
      , "mayoral elections", "subnational (Brazil)", "subnational (outside Brazil)")) )
summary (bele_long)
range (bele_long$value, na.rm=T) ## 0 to 5.2

# drawing the plots
(pDescIdeoPolarLR <- ggplot (data=bele_long %>% filter (ideo_measure %in% c ("Wikipedia: Left-Right", "V-Party: Left-Right"))
                           , aes (x=value, fill=ideo_measure))
  + geom_density (alpha=alpha_dots)
  + scale_x_continuous (name="difference b/w 1st and 2nd (absolute)", limits=c (0, 5.2)
                        , breaks=seq (0, 5, by=1), labels= seq (0, 5, by=1))
  # + scale_y_continuous (limits=c (0, 1.55), breaks=seq (0, 1.55, by=0.25))
  + scale_fill_manual (name="ideology measure", values=c (col_movie[1], col_movie[5]))
  + facet_wrap (~ sample, ncol=1)
  + theme (strip.background=element_rect (fill=col_movie[3]), legend.position="bottom", legend.title=element_blank (), legend.text=element_text (size=size_text*2.25), legend.box.margin=margin (-0,-9,-9,-9)) )

(pDescIdeoPolarOth <- ggplot (data=bele_long %>% filter (ideo_measure %in% c ("V-Party: (Il)liberalism", "V-Party: Post-Materialism"))
                        , aes (x=value, fill=ideo_measure))
  + geom_density (alpha=alpha_dots)
  + scale_x_continuous (name="difference b/w 1st and 2nd (absolute)", limits=c (0, 5.2)
                        , breaks=seq (0, 5, by=1), labels= seq (0, 5, by=1))
  # + scale_y_continuous (limits=c (0, 1.55), breaks=seq (0, 1.55, by=0.25))
  + scale_fill_manual (name="ideology measure", values=c (col_movie[1], col_movie[5]))
  + facet_wrap (~ sample, ncol=1)
  + theme (strip.background=element_rect (fill=col_movie[3]), legend.position="bottom", legend.title=element_blank (), legend.text=element_text (size=size_text*2.25), legend.box.margin=margin (-0,-9,-9,-9)) )



### (2.5.5) Scatter 1st vs 2nd + reversions ####

# Figure A8: ideology: Wikipedia
range (select (ungroup (filter (bele, r2_needed==1)), wiki_lr_first, wiki_lr_second), na.rm=TRUE) ## -2.3 to 1.9
(pDescIdeo1vs2Wiki <- ggplot (
  data=filter (bele, r2_needed==1), aes (x=wiki_lr_first, y=wiki_lr_second, color=factor (reversion_ele)))
  + geom_abline (linetype=2, color=gray_lines)
  + geom_point (alpha=alpha_dots, size=size_bin)
  + scale_x_continuous (name="Left-Right score (Wikipedia): First-placed candidate in first round", limits=c (-2.25, 2.25)
                        , breaks=seq (-4, 4, by=1), labels=seq (-4, 4, by=1))
  + scale_y_continuous (name="Left-Right score (Wikipedia): Runner-up in first round", limits=c (-2.25, 2.25)
                        , breaks=seq (-4, 4, by=1), labels=seq (-4, 4, by=1))
  + scale_color_manual (name="second round reversal", labels=c ("no", "yes"), values=c (col_movie[1], col_movie[5]))
  + facet_wrap (~ sample2b, ncol=2)
  + theme (strip.background=element_rect (fill=col_movie[3]), legend.position="bottom", legend.text=element_text (size=size_text*2.25), legend.box.margin=margin (-9,-9,-9,-9)) )

# Figure A9: ideology: LR
range (select (ungroup (filter (bele, r2_needed==1)), ideoLR_fa_first, ideoLR_fa_second), na.rm=TRUE) ## -3.4 to 3.3
(pDescIdeo1vs2LR <- ggplot (
  data=filter (bele, r2_needed==1), aes (x=ideoLR_fa_first, y=ideoLR_fa_second, color=factor (reversion_ele)))
  + geom_abline (linetype=2, color=gray_lines)
  + geom_point (alpha=alpha_dots, size=size_bin)
  + scale_x_continuous (name="Left-Right score (V-Party): First-placed candidate in first round", limits=c (-3.5, 3.5)
                        , breaks=seq (-4, 4, by=1), labels=seq (-4, 4, by=1))
 + scale_y_continuous (name="Left-Right score (V-Party): Runner-up in first round", limits=c (-3.5, 3.5)
                       , breaks=seq (-4, 4, by=1), labels=seq (-4, 4, by=1))
 + scale_color_manual (name="second round reversal", labels=c ("no", "yes"), values=c (col_movie[1], col_movie[5]))
  + facet_wrap (~ sample2b, ncol=2)
  + theme (strip.background=element_rect (fill=col_movie[3]), legend.position="bottom", legend.text=element_text (size=size_text*2.25), legend.box.margin=margin (-9,-9,-9,-9)) )



### (2.5.6) Exporting ####

pwid <- 12*0.95
phei <- 30*1.0

# 3 x 2 plots:
ggsave ("figures/figDescIdeo1vs2Wiki.png"
        , pDescIdeo1vs2Wiki, width=pwid*1.5, height=phei*0.925, units="cm", dpi=600)
ggsave ("figures/figDescIdeo1vs2LR.png"
        , pDescIdeo1vs2LR, width=pwid*1.5, height=phei*0.925, units="cm", dpi=600)

# "long" plots
ggsave ("figures/figDescIdeoMissingWiki.png"
        , pDescIdeoMissingWiki, width=pwid, height=phei, units="cm", dpi=600)
ggsave ("figures/figDescIdeoMissingVParty.png"
        , pDescIdeoMissingVParty, width=pwid, height=phei, units="cm", dpi=600)
ggsave ("figures/figDescIdeoPolarLR.png"
        , pDescIdeoPolarLR, width=pwid, height=phei, units="cm", dpi=600)
ggsave ("figures/figDescIdeoPolarOth.png"
        , pDescIdeoPolarOth, width=pwid, height=phei, units="cm", dpi=600)
ggsave ("figures/figDescIdeoWiki.png"
        , pDescIdeoWiki, width=pwid, height=phei, units="cm", dpi=600)
ggsave ("figures/figDescIdeoLR.png"
        , pDescIdeoLR, width=pwid, height=phei, units="cm", dpi=600)
ggsave ("figures/figDescIdeoIL.png"
        , pDescIdeoIL, width=pwid, height=phei, units="cm", dpi=600)
ggsave ("figures/figDescIdeoPM.png"
        , pDescIdeoPM, width=pwid, height=phei, units="cm", dpi=600)



#### (2.6) Table A3: Descriptive statistics: differences-in-means around small bandwidths ####

# 2pp sample
bdesc2 <- bcand %>% 
  filter (margin12 <= 2) %>% 
  group_by (sample2, first_r1) %>% 
  summarise (
    n = n ()
    , winner = mean (winner, na.rm=TRUE)
    , share_r2 = mean (share_r2, na.rm=TRUE)
    , bwd = 2)

# 5pp sample
bdesc5 <- bcand %>% 
  filter (margin12 <= 5) %>% 
  group_by (sample2, first_r1) %>% 
  summarise (
    n = n ()
    , winner = mean (winner, na.rm=TRUE)
    , share_r2 = mean (share_r2, na.rm=TRUE)
    , bwd = 5)

# joining and widening for export
bdesc <- bind_rows (bdesc2, bdesc5) %>% 
  pivot_wider (
    names_from = "first_r1"
    , values_from = n:share_r2) %>% 
  mutate (
    winner_sum = winner_0 + winner_1
    , share_r2_sum = share_r2_0 + share_r2_0
    , winner_dif = sprintf ("%.2f", round (winner_1 - winner_0, 2))
    , share_r2_dif = sprintf ("%.2f", round (share_r2_1 - share_r2_0, 2))
    , winner_0 = sprintf ("%.2f", round (winner_0, 2))
    , winner_1 = sprintf ("%.2f", round (winner_1, 2))
    , share_r2_0 = sprintf ("%.2f", round (share_r2_0, 2))
    , share_r2_1 = sprintf ("%.2f", round (share_r2_1, 2))
    , n = str_c (n_0, "$|$", n_1, sep="")
    ) %>% ungroup () %>% 
  select (sample2, bwd, n, winner_0:winner_1, winner_dif, share_r2_0:share_r2_1, share_r2_dif)

bdesc %>% mutate (win_sum = as.numeric (winner_0) + as.numeric (winner_1), sh_sum = as.numeric (share_r2_0) + as.numeric (share_r2_1)) ## no discrepancies. Good

# exporting to LaTeX
(tabBwd <- cbind (
  c ("full sample", "presidential elections", "gubernatorial elections", "mayoral elections", "subnational (Brazil)", "subnational (outside Brazil)")
  , bdesc$n
  , rep ("", nrow (bdesc))
  , bdesc %>% select (winner_0:winner_dif)
  , rep ("", nrow (bdesc))
  , bdesc %>% select (share_r2_0:share_r2_dif) ))

Header1 <- str_c ("\\toprule & & & \\multicolumn{3}{c}{\\textsc{dv}: \\emph{winner} (0/100)} & & \\multicolumn{3}{c}{\\textsc{dv}: \\emph{vote share}$_{\\textsc{r}2}$ (0:100)} \\\\ \\cmidrule{4-6} \\cmidrule{8-10} \n")
Header2 <- str_c ("\\multicolumn{1}{l}{(a) 2~pp. bandwidth} & \\multicolumn{1}{c}{$N^{-} | N^{+}$} & & \\multicolumn{1}{c}{$\\bar{y}^{-}$} & \\multicolumn{1}{c}{$\\bar{y}^{+}$} & \\multicolumn{1}{c}{diff.} & & \\multicolumn{1}{c}{$\\bar{y}^{-}$} & \\multicolumn{1}{c}{$\\bar{y}^{+}$} & \\multicolumn{1}{c}{diff.} \\\\ \\midrule \n")
Header3 <- str_c ("[2.0ex] \\multicolumn{10}{l}{(b) 5~pp. bandwidth} \\\\ \\midrule \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{10}{l}{}
  \\begin{minipage}{12.0cm}~\\\\
  \\footnotesize Number of observations, mean outcome values of bare winners and losers, and differences in means, for observations within (a) 2~percentage points; and (b) 5~percentage points of the threshold, by subsample.
  \\end{minipage}}\\\\")

addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 6
addtorow$pos[[4]] <- 12
addtorow$command <- c (Header1, Header2, Header3, Bottom1)
print (xtable ( as.matrix (tabBwd)
                , align=c("l","l","r","r","r","r","r","r","r","r","r")
                , digits=2
                , caption="{Differences in means between treatment and control groups, 2~and~5~pp. bandwidths}"
                , label="T:tabDescBwd")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow  
       , file="tables/tabDescBwd.tex"
       )




###### (3) RD plots ######

#### (3.1) Getting and storing the info ####

## list of outcome variables + objects to store the results
outcomes <- c ("winner", "share_r2")
samples <- unique (as.character (bcand$sample2))
dist_measure <- c ("wiki_lr_dist12_ntile2", "ideoLR_dist12_ntile2", "ideoIL_dist12_ntile2", "ideoPM_dist12_ntile2")
plot_results <- NULL ## we need this to "store" the results


## (3.1.1) loop to get all the plots ####
for (o in 1:length (outcomes)){
  
  ### selecting the outcomes of interest
  bcand$outcome_tmp <- unlist (bcand[,which (colnames (bcand) == outcomes[o])])
  
  
  ### selecting the sample(s) of interest
  for (s in 1:n_distinct (bcand$sample2)){
    
    ## selecting the distance (polarization) measure
    for (d in 1:length (dist_measure)) {
      
      ## specifying the polarization measure
      bcand$dist_tmp <- unlist (bcand[,which (colnames (bcand) == dist_measure[d])])
      
      
      ### drawing the plots
      
      ## (1) QSMV: quantile-spaced, mimicking variability -> better reflects the actual distribution of the data
      
      # full sample(s)
      qsmv_all <- with (filter (bcand, sample2==samples[s]), rdplot (
        y=outcome_tmp, x=score, p=3, binselect="qsmv", scale=1, kernel="uniform", x.lim=c (-50, 50)))
      
      # non-NA samples
      qsmv_nona <- with (filter (bcand, sample2==samples[s] & !is.na (dist_tmp)), rdplot (
        y=outcome_tmp, x=score, p=3, binselect="qsmv", scale=1, kernel="uniform", x.lim=c (-50, 50)))
      
      # polarized elections
      qsmv_pola <- with (filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp==1), rdplot (
        y=outcome_tmp, x=score, p=3, binselect="qsmv", scale=1, kernel="uniform", x.lim=c (-50, 50)))
      
      # close elections
      qsmv_close <- with (filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp==2), rdplot (
        y=outcome_tmp, x=score, p=3, binselect="qsmv", scale=1, kernel="uniform", x.lim=c (-50, 50)))
      
      
      ## exporting the data we'll use to build the plots manually later
      qsmv_all_p <- bind_rows (
        ggplot_build (qsmv_all$rdplot)$data[[1]] %>% mutate (type="bin")
        , ggplot_build (qsmv_all$rdplot)$data[[2]] %>% mutate (type="ll")
        , ggplot_build (qsmv_all$rdplot)$data[[3]] %>% mutate (type="rl") ) %>% mutate (
          bin_select="qsmv", poly=3, subset="main", dist_measure=dist_measure[d], sample2=samples[s], outcome=outcomes[o] )
      
      qsmv_nona_p <- bind_rows (
        ggplot_build (qsmv_nona$rdplot)$data[[1]] %>% mutate (type="bin")
        , ggplot_build (qsmv_nona$rdplot)$data[[2]] %>% mutate (type="ll")
        , ggplot_build (qsmv_nona$rdplot)$data[[3]] %>% mutate (type="rl") ) %>% mutate (
          bin_select="qsmv", poly=3, subset="nona", dist_measure=dist_measure[d], sample2=samples[s], outcome=outcomes[o] )
      
      qsmv_pola_p <- bind_rows (
        ggplot_build (qsmv_pola$rdplot)$data[[1]] %>% mutate (type="bin")
        , ggplot_build (qsmv_pola$rdplot)$data[[2]] %>% mutate (type="ll")
        , ggplot_build (qsmv_pola$rdplot)$data[[3]] %>% mutate (type="rl") ) %>% mutate (
          bin_select="qsmv", poly=3, subset="pola", dist_measure=dist_measure[d], sample2=samples[s], outcome=outcomes[o] )
      
      qsmv_close_p <- bind_rows (
        ggplot_build (qsmv_close$rdplot)$data[[1]] %>% mutate (type="bin")
        , ggplot_build (qsmv_close$rdplot)$data[[2]] %>% mutate (type="ll")
        , ggplot_build (qsmv_close$rdplot)$data[[3]] %>% mutate (type="rl") ) %>% mutate (
          bin_select="qsmv", poly=3, subset="close", dist_measure=dist_measure[d], sample2=samples[s], outcome=outcomes[o] )
      
      ## exporting the values
      plot_results <- bind_rows (plot_results,
                                 qsmv_all_p, qsmv_nona_p, qsmv_pola_p, qsmv_close_p
      ) }}}


## (3.1.2) updating some variables ####
plot_results2 <- plot_results %>%
  mutate (
    type = factor (type)
    , bin_select = factor (bin_select)
    , sample2 = factor (sample2)
    , subset = factor (subset, levels=c (
      "main", "nona", "pola", "close"))
    , dist_measure = case_when (
      dist_measure == "wiki_lr_dist12_ntile2" ~ "wiki"
      , dist_measure == "ideoIL_dist12_ntile2" ~ "IL"
      , dist_measure == "ideoLR_dist12_ntile2" ~ "LR"
      , dist_measure == "ideoPM_dist12_ntile2" ~ "PM")
    , dist_measure = factor (dist_measure, levels = c (
      "wiki", "LR", "IL", "PM"))
    , outcome = factor (outcome)
    
    # renaming the samples
    , sample = case_when (
      sample2 == "full" ~ "full sample"
      , sample2 == "presi" ~ "presidential\nelections"
      , sample2 == "gover" ~ "gubernatorial\nelections"
      , sample2 == "mayor" ~ "mayoral\nelections"
      , sample2 == "brazil" ~ "subnational\n(Brazil)"
      , sample2 == "others" ~ "subnational\n(outside Brazil)")
    , sample = factor (sample, levels = c (
      "full sample", "presidential\nelections", "gubernatorial\nelections"
      , "mayoral\nelections", "subnational\n(Brazil)", "subnational\n(outside Brazil)"))
    
    # renaming the outcome variable
    , out_var = dplyr::recode (outcome
                        , winner = "winner (0/100)"
                        , share_r2 = "vote share[R2] (0:100)"
                        )
    , out_var = factor (out_var, levels = c (
      "winner (0/100)", "vote share[R2] (0:100)"))
    
    # renaming the distance measure(s)
    , dist_measure = dplyr::recode (
      dist_measure
      , wiki = "Wikipedia"
      , LR = "Left-Right"
      , IL = "(Il)Liberalism"
      , PM = "Post-Materialism")
    , dist_measure = factor (dist_measure, levels = c (
      "Wikipedia", "Left-Right", "(Il)Liberalism", "Post-Materialism"))
    , dist_nam = case_when (
      subset == "close" ~ str_c ("ideologically close (", dist_measure, ")")
      , subset == "pola" ~ str_c ("ideologically polarized (", dist_measure, ")")
      , TRUE ~ "other")
    , dist_nam = factor (dist_nam, levels=c (
      "ideologically polarized (Wikipedia)"
      , "ideologically polarized (Left-Right)"
      , "ideologically polarized ((Il)Liberalism)"
      , "ideologically polarized (Post-Materialism)"
      , "ideologically close (Wikipedia)"
      , "ideologically close (Left-Right)"
      , "ideologically close ((Il)Liberalism)"
      , "ideologically close (Post-Materialism)"
      , "other"))
  ) %>% group_by ( ## to identify each plot uniquely
    sample2, dist_nam, subset, outcome, bin_select, poly) %>% 
  mutate (
    y_below = ifelse (type=="ll" & x==0, y, NA)
    , y_below = max (y_below, na.rm=T)
    , y_above = ifelse (type=="rl" & x==0, y, NA)
    , y_above = max (y_above, na.rm=T)
    , rd_estim = y_above - y_below
    , rd_estim_text = str_c ("\u03c4 = ", sprintf ("%.2f", round (rd_estim, 2)), "pp.") ## tau
    
    , PANEL = NULL
    , group = NULL
    , shape = NULL
    , colour = NULL
    , size = NULL
    , fill = NULL
    , alpha = NULL
    , stroke = NULL
    , flipped_aes = NULL
    , linetype = NULL ) ## warning reported. Don't worry
summary (plot_results2)



### (3.2) Drawing the plots ####

## (3.2.1a) Figure 2: All data, QSMV, winner
(rdp11_all_qsmv3 <- ggplot (filter (plot_results2, outcome=="winner", subset=="main", bin_select=="qsmv", poly==3, type=="bin"), aes (x=x, y=y))
 + geom_vline (xintercept=0, col=col_cutoff)
 + geom_point (size=size_bin, alpha=alpha_bin, col=col_bin)
 + geom_line (data=filter (plot_results2, outcome=="winner", subset=="main", bin_select=="qsmv", poly==3, type=="ll"), aes (x=x, y=y), col=col_line)
 + geom_line (data=filter (plot_results2, outcome=="winner", subset=="main", bin_select=="qsmv", poly==3, type=="rl"), aes (x=x, y=y), col=col_line)
 + facet_wrap ( ~ sample, ncol=2)
 + geom_text (data=filter (plot_results2, outcome=="winner", subset=="main", bin_select=="qsmv", poly==3) %>% group_by (sample) %>% summarise (rd_estim_text=unique (rd_estim_text))
              , mapping=aes (x=place_text_x, y=place_text_y, label=rd_estim_text), size=size_text )
 + scale_x_continuous (limits=c (-35, 35), breaks=seq (-50, 50, by=10))
 + scale_y_continuous (limits=c (0, 100), breaks=seq (0, 100, by=20))
 + xlab (expression (first~round~margin~"(%)")) + ylab (expression (winner~"(%)"))
 + theme (strip.background=element_rect (fill=col_movie[3])
          , axis.title.x = element_text (margin=margin (t=10, r=0, b=0, l=0))) )


## (3.2.1b) Figure A11: All data, QSMV, share_r2
(rdp12_all_qsmv3 <- ggplot (filter (plot_results2, outcome=="share_r2", subset=="main", bin_select=="qsmv", poly==3, type=="bin"), aes (x=x, y=y))
  + geom_vline (xintercept=0, col=col_cutoff)
  + geom_point (size=size_bin, alpha=alpha_bin, col=col_bin)
  + geom_line (data=filter (plot_results2, outcome=="share_r2", subset=="main", bin_select=="qsmv", poly==3, type=="ll"), aes (x=x, y=y), col=col_line)
  + geom_line (data=filter (plot_results2, outcome=="share_r2", subset=="main", bin_select=="qsmv", poly==3, type=="rl"), aes (x=x, y=y), col=col_line)
  + facet_wrap ( ~ sample, ncol=2)
  + geom_text (data=filter (plot_results2, outcome=="share_r2", subset=="main", bin_select=="qsmv", poly==3) %>% group_by (sample) %>% summarise (rd_estim_text=unique (rd_estim_text))
               , mapping=aes (x=place_text_x, y=place_text_y3, label=rd_estim_text), size=size_text )
  + scale_x_continuous (limits=c (-35, 35), breaks=seq (-50, 50, by=10))
  + scale_y_continuous (limits=c (30, 70), breaks=seq (30, 70, by=10))
  + xlab (expression (first~round~margin~"(%)")) + ylab (expression (vote~share~R2~"(%)"))
  + theme (strip.background=element_rect (fill=col_movie[3])
           , axis.title.x = element_text (margin=margin (t=10, r=0, b=0, l=0))) )


## (3.2.2a) Figure 3: Heterogeneous Wikipedia, QSMV, winner
(rdp21_hetWiki_qsmv3 <- ggplot (filter (plot_results2, outcome=="winner", subset%in%c("close","pola"), dist_measure=="Wikipedia", bin_select=="qsmv", poly==3, type=="bin"), aes (x=x, y=y))
  + geom_vline (xintercept=0, col=col_cutoff)
  + geom_point (size=size_bin, alpha=alpha_bin, col=col_bin)
  + geom_line (data=filter (plot_results2, outcome=="winner", subset%in%c("close","pola"), dist_measure=="Wikipedia", bin_select=="qsmv", poly==3, type=="ll"), aes (x=x, y=y), col=col_line)
  + geom_line (data=filter (plot_results2, outcome=="winner", subset%in%c("close","pola"), dist_measure=="Wikipedia", bin_select=="qsmv", poly==3, type=="rl"), aes (x=x, y=y), col=col_line)
  + facet_grid (sample ~ dist_nam)
  + geom_text (data=filter (plot_results2, outcome=="winner", subset%in%c("close","pola"), dist_measure=="Wikipedia", bin_select=="qsmv", poly==3) %>% group_by (sample, dist_nam) %>% summarise (rd_estim_text=unique (rd_estim_text))
               , mapping=aes (x=place_text_x, y=place_text_y, label=rd_estim_text), size=size_text )
  + scale_x_continuous (limits=c (-35, 35), breaks=seq (-50, 50, by=10))
  + scale_y_continuous (limits=c (0, 100), breaks=seq (0, 100, by=20))
  + xlab (expression (first~round~margin~"(%)")) + ylab (expression (winner~"(%)"))
  + theme (strip.background=element_rect (fill=col_movie[3])
           , axis.title.x = element_text (margin=margin (t=10, r=0, b=0, l=0))) )


## (3.2.2c) Figure A12: Heterogeneous Wikipedia, QSMV, share_r2
(rdp22_hetWiki_qsmv3 <- ggplot (filter (plot_results2, outcome=="share_r2", subset%in%c("close","pola"), dist_measure=="Wikipedia", bin_select=="qsmv", poly==3, type=="bin"), aes (x=x, y=y))
  + geom_vline (xintercept=0, col=col_cutoff)
  + geom_point (size=size_bin, alpha=alpha_bin, col=col_bin)
  + geom_line (data=filter (plot_results2, outcome=="share_r2", subset%in%c("close","pola"), dist_measure=="Wikipedia", bin_select=="qsmv", poly==3, type=="ll"), aes (x=x, y=y), col=col_line)
  + geom_line (data=filter (plot_results2, outcome=="share_r2", subset%in%c("close","pola"), dist_measure=="Wikipedia", bin_select=="qsmv", poly==3, type=="rl"), aes (x=x, y=y), col=col_line)
  + facet_grid (sample ~ dist_nam)
  + geom_text (data=filter (plot_results2, outcome=="share_r2", subset%in%c("close","pola"), dist_measure=="Wikipedia", bin_select=="qsmv", poly==3) %>% group_by (sample, dist_nam) %>% summarise (rd_estim_text=unique (rd_estim_text))
               , mapping=aes (x=place_text_x, y=place_text_y3, label=rd_estim_text), size=size_text )
  + scale_x_continuous (limits=c (-35, 35), breaks=seq (-50, 50, by=10))
  + scale_y_continuous (limits=c (30, 70), breaks=seq (30, 70, by=10))
  + xlab (expression (first~round~margin~"(%)")) + ylab (expression (vote~share[R2]~"(%)"))
  + theme (strip.background=element_rect (fill=col_movie[3])
           , axis.title.x = element_text (margin=margin (t=10, r=0, b=0, l=0))) )


## (3.2.3a) Figure A10: Heterogeneous LR, QSMV, winner
(rdp31_hetLR_qsmv3 <- ggplot (filter (plot_results2, outcome=="winner", subset%in%c("close","pola"), dist_measure=="Left-Right", bin_select=="qsmv", poly==3, type=="bin"), aes (x=x, y=y))
  + geom_vline (xintercept=0, col=col_cutoff)
  + geom_point (size=size_bin, alpha=alpha_bin, col=col_bin)
  + geom_line (data=filter (plot_results2, outcome=="winner", subset%in%c("close","pola"), dist_measure=="Left-Right", bin_select=="qsmv", poly==3, type=="ll"), aes (x=x, y=y), col=col_line)
  + geom_line (data=filter (plot_results2, outcome=="winner", subset%in%c("close","pola"), dist_measure=="Left-Right", bin_select=="qsmv", poly==3, type=="rl"), aes (x=x, y=y), col=col_line)
  + facet_grid (sample ~ dist_nam)
  + geom_text (data=filter (plot_results2, outcome=="winner", subset%in%c("close","pola"), dist_measure=="Left-Right", bin_select=="qsmv", poly==3) %>% group_by (sample, dist_nam) %>% summarise (rd_estim_text=unique (rd_estim_text))
               , mapping=aes (x=place_text_x, y=place_text_y, label=rd_estim_text), size=size_text )
  + scale_x_continuous (limits=c (-35, 35), breaks=seq (-50, 50, by=10))
  + scale_y_continuous (limits=c (0, 100), breaks=seq (0, 100, by=20))
  + xlab (expression (first~round~margin~"(%)")) + ylab (expression (winner~"(%)"))
  + theme (strip.background=element_rect (fill=col_movie[3])
           , axis.title.x = element_text (margin=margin (t=10, r=0, b=0, l=0))) )


## (3.2.3b) Figure A13: Heterogeneous LR, QSMV, share_r2
(rdp32_hetLR_qsmv3 <- ggplot (filter (plot_results2, outcome=="share_r2", subset%in%c("close","pola"), dist_measure=="Left-Right", bin_select=="qsmv", poly==3, type=="bin"), aes (x=x, y=y))
  + geom_vline (xintercept=0, col=col_cutoff)
  + geom_point (size=size_bin, alpha=alpha_bin, col=col_bin)
  + geom_line (data=filter (plot_results2, outcome=="share_r2", subset%in%c("close","pola"), dist_measure=="Left-Right", bin_select=="qsmv", poly==3, type=="ll"), aes (x=x, y=y), col=col_line)
  + geom_line (data=filter (plot_results2, outcome=="share_r2", subset%in%c("close","pola"), dist_measure=="Left-Right", bin_select=="qsmv", poly==3, type=="rl"), aes (x=x, y=y), col=col_line)
  + facet_grid (sample ~ dist_nam)
  + geom_text (data=filter (plot_results2, outcome=="share_r2", subset%in%c("close","pola"), dist_measure=="Left-Right", bin_select=="qsmv", poly==3) %>% group_by (sample, dist_nam) %>% summarise (rd_estim_text=unique (rd_estim_text))
               , mapping=aes (x=place_text_x, y=place_text_y3, label=rd_estim_text), size=size_text )
  + scale_x_continuous (limits=c (-35, 35), breaks=seq (-50, 50, by=10))
  + scale_y_continuous (limits=c (30, 70), breaks=seq (30, 70, by=10))
  + xlab (expression (first~round~margin~"(%)")) + ylab (expression (vote~share[R2]~"(%)"))
  + theme (strip.background=element_rect (fill=col_movie[3])
           , axis.title.x = element_text (margin=margin (t=10, r=0, b=0, l=0))) )



### (3.3) Exporting the plots ####

## (3.3.1) main results
pwid <- 17*1.10
phei <- 17*1.15
ggsave ("figures/figRDPlotWinner.png"
        , rdp11_all_qsmv3, width=pwid, height=phei, units="cm")
ggsave ("figures/figRDPlotShare.png"
        , rdp12_all_qsmv3, width=pwid, height=phei, units="cm")


## (3.3.2) heterogeneous effects
pwid <- 17*1.5
phei <- 17*1.85
ggsave ("figures/figRDPlotHetWikiWinner.png"
        , rdp21_hetWiki_qsmv3, width=pwid, height=phei, units="cm")
ggsave ("figures/figRDPlotHetWikiShare.png"
        , rdp22_hetWiki_qsmv3, width=pwid, height=phei, units="cm")
ggsave ("figures/figRDPlotHetLRWinner.png"
        , rdp31_hetLR_qsmv3, width=pwid, height=phei, units="cm")
ggsave ("figures/figRDPlotHetLRShare.png"
        , rdp32_hetLR_qsmv3, width=pwid, height=phei, units="cm")




###### (4) RD analyses ######

#### (4.1) Estimating all the models and storing the info of interest ####

## objects to store the values of interest
outcomes <- c ("winner", "share_r2")
dist_measure <- c ("wiki_lr_dist12_ntile2", "ideoLR_dist12_ntile2", "ideoIL_dist12_ntile2", "ideoPM_dist12_ntile2")
samples <- unique (as.character (bcand$sample2))
rd_results_main <- rd_results_ideo <- rd_results_inc <- rd_results_exp <- NULL ## we need this to "store" the results


## loop to get all the results ####
for (o in 1:length (outcomes)){
  
  ## selecting the outcomes of interest
  bcand$outcome_tmp <- unlist (bcand[,which (colnames (bcand) == outcomes[o])])
  
  ## selecting the sample(s) of interest
  for (s in 1:n_distinct (bcand$sample2)){
    
    ## selecting the distance (polarization) measure
    for (d in 1:length (dist_measure)) {
      
      ## specifying the polarization measure
      bcand$dist_tmp <- unlist (bcand[,which (colnames (bcand) == dist_measure[d])])
      
      ## selecting the bandwidth selection procedure
      for (b in c ("mserd", "cerrd")) {
        
        ## selecting the polynomial degree
        for (p in 1:2) {
          
          
          ## main results
          if (dist_measure[d] %in% c ("wiki_lr_dist12_ntile2", "ideoLR_dist12_ntile2")){
            
          ## all data
          rd_main <- with (
            filter (bcand, sample2==samples[s]
            ), rdrobust (
              y=outcome_tmp, x=score, bwselect=b, covs=NULL, cluster=ele_id, p=p, q=p+1))
          rd_main_cont <- with (
            filter (bcand, sample2==samples[s]
            ), rdrobust (
              y=outcome_tmp, x=score, bwselect=b, covs=wiki_lr + incParty + incCandidate, cluster=ele_id, p=p, q=p+1))
          
          # SD of outcome in the control group within (conventional) RD bandwidth
          sd_main_c <- sd (filter (bcand, sample2==samples[s] & score<0 & abs(score)<=rd_main$bws[1,1])$outcome_tmp, na.rm=TRUE)
          sd_main_cont_c <- sd (filter (bcand, sample2==samples[s] & !is.na(wiki_lr) & !is.na(incParty) & !is.na(incCandidate) & score<0 & abs(score)<=rd_main_cont$bws[1,1])$outcome_tmp, na.rm=TRUE)
          
          # power
          pow_main01 <- rdpower (data=as.data.frame (
            filter (bcand, sample2==samples[s]) %>% select (outcome_tmp, score))
            , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
            , tau=sd_main_c
            , cluster=filter (bcand, sample2==samples[s])$ele_id)
          pow_main02 <- rdpower (data=as.data.frame (
            filter (bcand, sample2==samples[s]) %>% select (outcome_tmp, score))
            , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
            , tau=sd_main_c/2
            , cluster=filter (bcand, sample2==samples[s])$ele_id)
          pow_main03 <- rdpower (data=as.data.frame (
            filter (bcand, sample2==samples[s]) %>% select (outcome_tmp, score))
            , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
            , tau=rd_main$Estimate[1]
            , cluster=filter (bcand, sample2==samples[s])$ele_id)
          
          pow_main_cont01 <- rdpower (data=as.data.frame (
            filter (bcand, sample2==samples[s] & !is.na(wiki_lr) & !is.na(incParty) & !is.na(incCandidate)) %>% select (outcome_tmp, score))
            , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
            , tau=sd_main_cont_c
            , cluster=filter (bcand, sample2==samples[s] & !is.na(wiki_lr) & !is.na(incParty) & !is.na(incCandidate))$ele_id)
          pow_main_cont02 <- rdpower (data=as.data.frame (
            filter (bcand, sample2==samples[s] & !is.na(wiki_lr) & !is.na(incParty) & !is.na(incCandidate)) %>% select (outcome_tmp, score))
            , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
            , tau=sd_main_cont_c/2
            , cluster=filter (bcand, sample2==samples[s] & !is.na(wiki_lr) & !is.na(incParty) & !is.na(incCandidate))$ele_id)
          pow_main_cont03 <- rdpower (data=as.data.frame (
            filter (bcand, sample2==samples[s] & !is.na(wiki_lr) & !is.na(incParty) & !is.na(incCandidate)) %>% select (outcome_tmp, score))
            , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
            , tau=rd_main_cont$Estimate[1]
            , cluster=filter (bcand, sample2==samples[s] & !is.na(wiki_lr) & !is.na(incParty) & !is.na(incCandidate))$ele_id)
          
          
          ## excluding NA's from ideology measure
          rd_nona <- with (
            filter (bcand, sample2==samples[s] & !is.na (dist_tmp)
            ), rdrobust (
              y=outcome_tmp, x=score, bwselect=b, covs=NULL, cluster=ele_id, p=p, q=p+1))
          
          # SD of outcome in the control group within (conventional) RD bandwidth
          sd_nona_c <- sd (filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & score<0 & abs(score)<=rd_nona$bws[1,1])$outcome_tmp, na.rm=TRUE)
          
          # power
          pow_nona01 <- rdpower (data=as.data.frame (
            filter (bcand, sample2==samples[s] & !is.na (dist_tmp)) %>% select (outcome_tmp, score))
            , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
            , tau=sd_nona_c
            , cluster=filter (bcand, sample2==samples[s] & !is.na (dist_tmp))$ele_id)
          pow_nona02 <- rdpower (data=as.data.frame (
            filter (bcand, sample2==samples[s] & !is.na (dist_tmp)) %>% select (outcome_tmp, score))
            , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
            , tau=sd_nona_c/2
            , cluster=filter (bcand, sample2==samples[s] & !is.na (dist_tmp))$ele_id)
          pow_nona03 <- rdpower (data=as.data.frame (
            filter (bcand, sample2==samples[s] & !is.na (dist_tmp)) %>% select (outcome_tmp, score))
            , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
            , tau=rd_nona$Estimate[1]
            , cluster=filter (bcand, sample2==samples[s] & !is.na (dist_tmp))$ele_id)
          }
          
          
          ## incumbent participation
          if (dist_measure[d] == "wiki_lr_dist12_ntile2"# & samples[s] != "others"
              ){
            rd_inone <- with (
              filter (bcand, sample2==samples[s] & incCandidate_none == 1
              ), rdrobust (
                y=outcome_tmp, x=score, bwselect=b, covs=NULL, cluster=ele_id, p=p, q=p+1))
            
            # SD of outcome in the control group within (conventional) RD bandwidth
            sd_inone_c <- sd (filter (bcand, sample2==samples[s] & incCandidate_none == 1 & score<0 & abs(score)<=rd_inone$bws[1,1])$outcome_tmp, na.rm=TRUE)
            
            # power
            pow_inone01 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & incCandidate_none == 1) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=sd_inone_c
              , cluster=filter (bcand, sample2==samples[s] & incCandidate_none == 1)$ele_id)
            pow_inone02 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & incCandidate_none == 1) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=sd_inone_c/2
              , cluster=filter (bcand, sample2==samples[s] & incCandidate_none == 1)$ele_id)
            pow_inone03 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & incCandidate_none == 1) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=rd_inone$Estimate[1]
              , cluster=filter (bcand, sample2==samples[s] & incCandidate_none == 1)$ele_id)
            }
            
          
          ## splitting by ideology
          if (b == "mserd" & p == 1){
            
            ## polarized elections
            rd_pola <- with (
              filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp == 1
              ), rdrobust (
                y=outcome_tmp, x=score, bwselect=b, covs=NULL, cluster=ele_id, p=p, q=p+1))
            
            # SD of outcome in the control group within (conventional) RD bandwidth
            sd_pola_c <- sd (filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp == 1 & score<0 & abs(score)<=rd_pola$bws[1,1])$outcome_tmp, na.rm=TRUE)
            
            # power
            pow_pola01 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp == 1) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=sd_pola_c
              , cluster=filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp == 1)$ele_id)
            pow_pola02 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp == 1) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=sd_pola_c/2
              , cluster=filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp == 1)$ele_id)
            pow_pola03 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp == 1) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=rd_pola$Estimate[1]
              , cluster=filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp == 1)$ele_id)
            
            ## (ideologically) close elections
            rd_close <- with (
              filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp == 2
              ), rdrobust (
                y=outcome_tmp, x=score, bwselect=b, covs=NULL, cluster=ele_id, p=p, q=p+1))
            
            # SD of outcome in the control group within (conventional) RD bandwidth
            sd_close_c <- sd (filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp == 2 & score<0 & abs(score)<=rd_close$bws[1,1])$outcome_tmp, na.rm=TRUE)
            
            # power
            pow_close01 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp == 2) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=sd_close_c
              , cluster=filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp == 2)$ele_id)
            pow_close02 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp == 2) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=sd_close_c/2
              , cluster=filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp == 2)$ele_id)
            pow_close03 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp == 2) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=rd_close$Estimate[1]
              , cluster=filter (bcand, sample2==samples[s] & !is.na (dist_tmp) & dist_tmp == 2)$ele_id)
            }
          
          
          ## splitting by experience (Brasil-only)
          if (samples[s] %in% c ("mayor", "brazil") & dist_measure[d] == "wiki_lr_dist12_ntile2" & b == "mserd" & p == 1){
            
            rd_enone <- with (
              filter (bcand, sample2==samples[s] & !is.na(exp_bra_none) & exp_bra_none == 1
              ), rdrobust (
                y=outcome_tmp, x=score, bwselect=b, covs=NULL, cluster=ele_id, p=p, q=p+1))
            rd_eboth <- with (
              filter (bcand, sample2==samples[s] & !is.na(exp_bra_both) & (exp_bra_none == 1 | exp_bra_both == 1)
              ), rdrobust (
                y=outcome_tmp, x=score, bwselect=b, covs=NULL, cluster=ele_id, p=p, q=p+1))
            rd_efirst <- with (
              filter (bcand, sample2==samples[s] & !is.na(exp_bra_first) & exp_bra_first == 1
              ), rdrobust (
                y=outcome_tmp, x=score, bwselect=b, covs=NULL, cluster=ele_id, p=p, q=p+1))
            rd_esecond <- with (
              filter (bcand, sample2==samples[s] & !is.na(exp_bra_second) & exp_bra_second == 1
              ), rdrobust (
                y=outcome_tmp, x=score, bwselect=b, covs=NULL, cluster=ele_id, p=p, q=p+1))
            
            # SD of outcome in the control group within (conventional) RD bandwidth
            sd_enone_c <- sd (filter (bcand, sample2==samples[s] & !is.na(exp_bra_none) & exp_bra_none == 1 & score<0 & abs(score)<=rd_enone$bws[1,1])$outcome_tmp, na.rm=TRUE)
            sd_eboth_c <- sd (filter (bcand, sample2==samples[s] & !is.na(exp_bra_both) & (exp_bra_none == 1 | exp_bra_both == 1) & score<0 & abs(score)<=rd_eboth$bws[1,1])$outcome_tmp, na.rm=TRUE)
            sd_efirst_c <- sd (filter (bcand, sample2==samples[s] & !is.na(exp_bra_first) & exp_bra_first == 1 & score<0 & abs(score)<=rd_efirst$bws[1,1])$outcome_tmp, na.rm=TRUE)
            sd_esecond_c <- sd (filter (bcand, sample2==samples[s] & !is.na(exp_bra_second) & exp_bra_second == 1 & score<0 & abs(score)<=rd_esecond$bws[1,1])$outcome_tmp, na.rm=TRUE)
            
            # power
            pow_enone01 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na(exp_bra_none) & exp_bra_none == 1) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=sd_enone_c
              , cluster=filter (bcand, sample2==samples[s] & !is.na(exp_bra_none) & exp_bra_none == 1)$ele_id)
            pow_enone02 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na(exp_bra_none) & exp_bra_none == 1) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=sd_enone_c/2
              , cluster=filter (bcand, sample2==samples[s] & !is.na(exp_bra_none) & exp_bra_none == 1)$ele_id)
            pow_enone03 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na(exp_bra_none) & exp_bra_none == 1) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=rd_enone$Estimate[1]
              , cluster=filter (bcand, sample2==samples[s] & !is.na(exp_bra_none) & exp_bra_none == 1)$ele_id)
            
            pow_eboth01 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na(exp_bra_both) & (exp_bra_none == 1 | exp_bra_both == 1)) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=sd_eboth_c
              , cluster=filter (bcand, sample2==samples[s] & !is.na(exp_bra_both) & (exp_bra_none == 1 | exp_bra_both == 1))$ele_id)
            pow_eboth02 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na(exp_bra_both) & (exp_bra_none == 1 | exp_bra_both == 1)) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=sd_eboth_c/2
              , cluster=filter (bcand, sample2==samples[s] & !is.na(exp_bra_both) & (exp_bra_none == 1 | exp_bra_both == 1))$ele_id)
            pow_eboth03 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na(exp_bra_both) & (exp_bra_none == 1 | exp_bra_both == 1)) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=rd_eboth$Estimate[1]
              , cluster=filter (bcand, sample2==samples[s] & !is.na(exp_bra_both) & (exp_bra_none == 1 | exp_bra_both == 1))$ele_id)
            
            pow_efirst01 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na(exp_bra_first) & exp_bra_first == 1) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=sd_efirst_c
              , cluster=filter (bcand, sample2==samples[s] & !is.na(exp_bra_first) & exp_bra_first == 1)$ele_id)
            pow_efirst02 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na(exp_bra_first) & exp_bra_first == 1) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=sd_efirst_c/2
              , cluster=filter (bcand, sample2==samples[s] & !is.na(exp_bra_first) & exp_bra_first == 1)$ele_id)
            pow_efirst03 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na(exp_bra_first) & exp_bra_first == 1) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=rd_efirst$Estimate[1]
              , cluster=filter (bcand, sample2==samples[s] & !is.na(exp_bra_first) & exp_bra_first == 1)$ele_id)
            
            pow_esecond01 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na(exp_bra_second) & exp_bra_second == 1) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=sd_esecond_c
              , cluster=filter (bcand, sample2==samples[s] & !is.na(exp_bra_second) & exp_bra_second == 1)$ele_id)
            pow_esecond02 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na(exp_bra_second) & exp_bra_second == 1) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=sd_esecond_c/2
              , cluster=filter (bcand, sample2==samples[s] & !is.na(exp_bra_second) & exp_bra_second == 1)$ele_id)
            pow_esecond03 <- rdpower (data=as.data.frame (
              filter (bcand, sample2==samples[s] & !is.na(exp_bra_second) & exp_bra_second == 1) %>% select (outcome_tmp, score))
              , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
              , tau=rd_esecond$Estimate[1]
              , cluster=filter (bcand, sample2==samples[s] & !is.na(exp_bra_second) & exp_bra_second == 1)$ele_id)
            }
          
          
          ## storing everything
          if (dist_measure[d] %in% c ("wiki_lr_dist12_ntile2", "ideoLR_dist12_ntile2")){
            rd_results_main <- rbind (
              rd_results_main
              , c (samples[s], c("wiki","LR","IL","PM")[d], "main", "none", outcomes[o], b, p, extract_rd (rd_main), sprintf ("%.2f", round (sd_main_c, 2)), sprintf ("%.2f", round (pow_main01$power.rbc, 2)), sprintf ("%.2f", round (pow_main02$power.rbc, 2)), sprintf ("%.2f", round (pow_main03$power.rbc, 2)))
              , c (samples[s], c("wiki","LR","IL","PM")[d], "nona", "none", outcomes[o], b, p, extract_rd (rd_nona), sprintf ("%.2f", round (sd_nona_c, 2)), sprintf ("%.2f", round (pow_nona01$power.rbc, 2)), sprintf ("%.2f", round (pow_nona02$power.rbc, 2)), sprintf ("%.2f", round (pow_nona03$power.rbc, 2)))
              , c (samples[s], c("wiki","LR","IL","PM")[d], "main", "all controls", outcomes[o], b, p, extract_rd (rd_main_cont), sprintf ("%.2f", round (sd_main_cont_c, 2)), sprintf ("%.2f", round (pow_main_cont01$power.rbc, 2)), sprintf ("%.2f", round (pow_main_cont02$power.rbc, 2)), sprintf ("%.2f", round (pow_main_cont03$power.rbc, 2)))
            )
          }
          
          if (dist_measure[d] == "wiki_lr_dist12_ntile2" #& samples[s] != "others"
              ){
            rd_results_inc <- rbind (
              rd_results_inc
              , c (samples[s], c("wiki","LR","IL","PM")[d], "i_none", "none", outcomes[o], b, p, extract_rd (rd_inone), sprintf ("%.2f", round (sd_inone_c, 2)), sprintf ("%.2f", round (pow_inone01$power.rbc, 2)), sprintf ("%.2f", round (pow_inone02$power.rbc, 2)), sprintf ("%.2f", round (pow_inone03$power.rbc, 2)))
              )
          }
          
          if (b == "mserd" & p == 1){
            rd_results_ideo <- rbind (
              rd_results_ideo
              , c (samples[s], c("wiki","LR","IL","PM")[d], "pola", "none", outcomes[o], b, p, extract_rd (rd_pola), sprintf ("%.2f", round (sd_pola_c, 2)), sprintf ("%.2f", round (pow_pola01$power.rbc, 2)), sprintf ("%.2f", round (pow_pola02$power.rbc, 2)), sprintf ("%.2f", round (pow_pola03$power.rbc, 2)))
              , c (samples[s], c("wiki","LR","IL","PM")[d], "close", "none", outcomes[o], b, p, extract_rd (rd_close), sprintf ("%.2f", round (sd_close_c, 2)), sprintf ("%.2f", round (pow_close01$power.rbc, 2)), sprintf ("%.2f", round (pow_close02$power.rbc, 2)), sprintf ("%.2f", round (pow_close03$power.rbc, 2)))
            )
            }
          
          if (samples[s] %in% c ("mayor", "brazil") & dist_measure[d] == "wiki_lr_dist12_ntile2" & b == "mserd" & p == 1){
            rd_results_exp <- rbind (
              rd_results_exp
              , c (samples[s], c("wiki","LR","IL","PM")[d], "e_none", "none", outcomes[o], b, p, extract_rd (rd_enone), sprintf ("%.2f", round (sd_enone_c, 2)), sprintf ("%.2f", round (pow_enone01$power.rbc, 2)), sprintf ("%.2f", round (pow_enone02$power.rbc, 2)), sprintf ("%.2f", round (pow_enone03$power.rbc, 2)))
              , c (samples[s], c("wiki","LR","IL","PM")[d], "e_both", "none", outcomes[o], b, p, extract_rd (rd_eboth), sprintf ("%.2f", round (sd_eboth_c, 2)), sprintf ("%.2f", round (pow_eboth01$power.rbc, 2)), sprintf ("%.2f", round (pow_eboth02$power.rbc, 2)), sprintf ("%.2f", round (pow_eboth03$power.rbc, 2)))
              , c (samples[s], c("wiki","LR","IL","PM")[d], "e_first", "none", outcomes[o], b, p, extract_rd (rd_efirst), sprintf ("%.2f", round (sd_efirst_c, 2)), sprintf ("%.2f", round (pow_efirst01$power.rbc, 2)), sprintf ("%.2f", round (pow_efirst02$power.rbc, 2)), sprintf ("%.2f", round (pow_efirst03$power.rbc, 2)))
              , c (samples[s], c("wiki","LR","IL","PM")[d], "e_second", "none", outcomes[o], b, p, extract_rd (rd_esecond), sprintf ("%.2f", round (sd_esecond_c, 2)), sprintf ("%.2f", round (pow_esecond01$power.rbc, 2)), sprintf ("%.2f", round (pow_esecond02$power.rbc, 2)), sprintf ("%.2f", round (pow_esecond03$power.rbc, 2)))
            )
          }
          }}}}}
rd_results <- as.data.frame (rbind (rd_results_main, rd_results_ideo, rd_results_inc, rd_results_exp))
colnames (rd_results) <- c ("sample2", "dist_measure", "subset", "covs", "outcome", "bwdselect", "poly", "est", "ci", "pval", "bwd", "n", "sd_c", "pow_sd", "pow_sd2", "pow_beta")
rd_results <- rd_results %>% 
  mutate (
    outcome = factor (outcome, levels=c (
      "winner", "share_r2"))
    , sample2 = factor (sample2, levels=c (
      "full", "presi", "gover", "mayor", "brazil", "others"))
    , dist_measure = factor (dist_measure, levels=c (
      "wiki", "LR", "IL", "PM"))
    , subset = factor (subset, levels=c (
      "main", "nona", "pola", "close", "i_none"
      , "e_none", "e_both", "e_first", "e_second"))
    , covs = factor (covs, levels=c (
      "none", "all controls"))
    , bwdselect = factor (bwdselect, levels=c (
      "mserd", "cerrd"))
    , poly = as.numeric (poly) 
    , empty = "") %>% 
  arrange (outcome)
summary (rd_results)
head (rd_results)




#### (4.2) Tables (I): Unconditional effects ####

### (4.2.1) Creating the labels of interest -> we'll use the same for all tables ####
rows_main <- rep (c ("full sample", "presidential", "gubernatorial", "mayoral", "subnational (Brazil)", "subnational ($\\neg$ Brazil)"), 2)

Header1 <- str_c ("\\toprule & & & & & & & & \\multicolumn{3}{c}{power against}\\\\ \\cmidrule{9-11} \n")
Header2 <- str_c ("\\multicolumn{1}{l}{(a) \\textsc{dv}: \\emph{winner} (0/100)} & \\multicolumn{1}{c}{estim.} & \\multicolumn{1}{c}{95\\% \\textsc{ci}} & \\multicolumn{1}{c}{$p$-val.} & \\multicolumn{1}{c}{bwd.} & \\multicolumn{1}{c}{$N^{-} | N^{+}$} & & 
\\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{$\\frac{\\textsc{sd}_{C}}{2}$} & \\multicolumn{1}{c}{$|\\hat{\\tau}_{\\textsc{rd}}|$} \\\\ \\midrule \n")
Header3 <- str_c ("[2.0ex] \\multicolumn{11}{l}{(b) \\textsc{dv}: \\emph{vote share}$_{\\textsc{r}2}$ (0:100)} \\\\ \\midrule \n")



### (4.2.2) Table 2: Main results ####
(tab_main <- cbind (rows_main, rd_results %>% filter (
  dist_measure=="wiki", subset=="main", covs=="none", bwdselect=="mserd", poly==1) %>% 
    select (est:n, empty, sd_c:pow_beta)))

Bottom1 <- str_c ("\\bottomrule \\multicolumn{11}{l}{
  \\begin{minipage}{13.75cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\cite{calonico_etal2014}.
  Observations are clustered by election.
  \\end{minipage}}\\\\")
addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 6
addtorow$pos[[4]] <- 12
addtorow$command <- c (Header1, Header2, Header3, Bottom1)
print (xtable ( tab_main
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c")
                , digits=2
                , caption="{\\textsc{rd} estimates of first-round advantage on second-round outcomes}"
                , label="T:main")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow
       , file="tables/tabMainFull.tex"
       )


### (4.2.3) Table A7: Non-NA sample ####
(tab_nona <- cbind (rows_main, rd_results %>% filter (
  dist_measure %in% c ("wiki", "LR"), subset=="nona", covs=="none", bwdselect=="mserd", poly==1) %>% 
    arrange (dist_measure) %>% 
    select (est:n, empty, sd_c:pow_beta)))

Header1b <- str_c ("\\multicolumn{1}{l}{(a) \\textsc{dv}: \\emph{winner} (0/100)} & \\multicolumn{10}{c}{Ideology: Wikipedia} \\\\ \\midrule \n")
Header1c <- str_c ("[2.5ex] \\midrule \\multicolumn{1}{l}{(c) \\textsc{dv}: \\emph{winner} (0/100)} & \\multicolumn{10}{c}{Ideology: \\textsc{v}-\\textsc{p}arty} \\\\ \\midrule \n")
Header2b <- str_c ("& \\multicolumn{1}{c}{estim.} & \\multicolumn{1}{c}{95\\% \\textsc{ci}} & \\multicolumn{1}{c}{$p$-val.} & \\multicolumn{1}{c}{bwd.} & \\multicolumn{1}{c}{$N^{-} | N^{+}$} & & 
\\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{$\\frac{\\textsc{sd}_{C}}{2}$} & \\multicolumn{1}{c}{$|\\hat{\\tau}_{\\textsc{rd}}|$} \\\\ \\midrule \n")
Header3b <- str_c ("[2.0ex] \\multicolumn{11}{l}{(d) \\textsc{dv}: \\emph{vote share}$_{\\textsc{r}2}$ (0:100)} \\\\ \\midrule \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{11}{l}{
  \\begin{minipage}{13.75cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\cite{calonico_etal2014}.
  The running variable is \\emph{first round margin}.
  Samples are restricted to elections (a) requiring a runoff and with (b) nonmissing data on the Left-Right ideology of the top two placed candidates, measured either using Wikipedia or \\textsc{v}-\\textsc{p}arty.
  Observations are clustered by election.
  The estimates are calculated by fitting a separate local linear regression at both sides of the threshold, using a triangular kernel.
  Reported number of observations indicate the \\emph{effective} sample sizes.
  \\end{minipage}}\\\\")
addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 0
addtorow$pos[[4]] <- 6
addtorow$pos[[5]] <- 12
addtorow$pos[[6]] <- 18
addtorow$pos[[7]] <- 24
addtorow$command <- c (Header1, Header2b, Header1b, Header3, Header1c, Header3b, Bottom1)
print (xtable ( tab_nona
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c")
                , digits=2
                , caption="{\\textsc{rd} estimates: Samples with nonmissing ideology scores}"
                , label="T:nonmissing")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow
       , file="tables/tabRobNonmissing.tex"
       )


### (4.2.4) Table A8: Including controls ####
(tab_controls <- cbind (rows_main, rd_results %>% filter (
  dist_measure=="wiki", subset=="main", covs=="all controls", bwdselect=="mserd", poly==1) %>% 
    select (est:n, empty, sd_c:pow_beta)))

Bottom1 <- str_c ("\\bottomrule \\multicolumn{11}{l}{
  \\begin{minipage}{13.75cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\cite{calonico_etal2014}.
  The running variable is \\emph{first round margin}.
  Samples are restricted to elections (a) requiring a runoff and with (b) nonmissing data on the Wikipedia Left-Right ideology of the top two placed candidates.
  All specifications control for (a) the Left-Right Wikipedia ideology; (b) partisan incumbency status; and (c) individual incumbency status of the top two placed candidates.
  The estimates are calculated by fitting a separate local linear regression at both sides of the threshold, using a triangular kernel.
  Reported number of observations indicate the \\emph{effective} sample sizes.
  \\end{minipage}}\\\\")
addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 6
addtorow$pos[[4]] <- 12
addtorow$command <- c (Header1, Header2, Header3, Bottom1)
print (xtable ( tab_controls
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c")
                , digits=2
                , caption="{\\textsc{rd} estimates: Including controls}"
                , label="T:controls")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow
       , file="tables/tabRobControls.tex"
       )


### (4.2.5) Table A9: CER-optimal bandwidth ####
(tab_cer <- cbind (rows_main, rd_results %>% filter (
  dist_measure=="wiki", subset=="main", covs=="none", bwdselect=="cerrd", poly==1) %>% 
    select (est:n, empty, sd_c:pow_beta)))

Bottom1 <- str_c ("\\bottomrule \\multicolumn{11}{l}{
  \\begin{minipage}{13.75cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{cer}-optimal bandwidth proposed by \\cite{calonico_etal2014}.
  The running variable is \\emph{first round margin}.
  Samples are restricted to elections requiring a runoff.
  Observations are clustered by election.
  The estimates are calculated by fitting a separate local linear regression at both sides of the threshold, using a triangular kernel.
  Reported number of observations indicate the \\emph{effective} sample sizes.
  \\end{minipage}}\\\\")
addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 6
addtorow$pos[[4]] <- 12
addtorow$command <- c (Header1, Header2, Header3, Bottom1)
print (xtable ( tab_cer
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c")
                , digits=2
                , caption="{\\textsc{rd} estimates: \\textsc{cer}-optimal bandwidths}"
                , label="T:cer")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow
       , file="tables/tabRobCER.tex"
       )


### (4.2.6) Table A10: 2nd-order polynomials ####
(tab_poly <- cbind (rows_main, rd_results %>% filter (
  dist_measure=="wiki", subset=="main", covs=="none", bwdselect=="mserd", poly==2) %>% 
    select (est:n, empty, sd_c:pow_beta)))

Bottom1 <- str_c ("\\bottomrule \\multicolumn{11}{l}{
  \\begin{minipage}{13.75cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\cite{calonico_etal2014}.
  The running variable is \\emph{first round margin}.
  Samples are restricted to elections requiring a runoff.
  Observations are clustered by election.
  The estimates are calculated by fitting a separate second-order polynomial regression at both sides of the threshold, using a triangular kernel.
  Reported number of observations indicate the \\emph{effective} sample sizes.
  \\end{minipage}}\\\\")
addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 6
addtorow$pos[[4]] <- 12
addtorow$command <- c (Header1, Header2, Header3, Bottom1)
print (xtable ( tab_poly
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c")
                , digits=2
                , caption="{\\textsc{rd} estimates: Second-order polynomials}"
                , label="T:poly2")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow
       , file="tables/tabRobPoly2.tex"
       )




#### (4.3) Tables (II): Heterogeneous effects ####

### (4.3.1) Creating the labels of interest ####
rows_het <- rep (c ("full sample", "", "[0.75ex] presidential", "", "[0.75ex] gubernatorial", "", "[0.75ex] mayoral", "", "[0.75ex] subnational (Brazil)", "", "[0.75ex] subnational ($\\neg$ Brazil)", ""), 2)
rows_het_inc <- rep (c ("full sample", "[0.75ex] presidential", "[0.75ex] gubernatorial", "[0.75ex] mayoral", "[0.75ex] subnational (Brazil)", "[0.75ex] subnational ($\\neg$ Brazil)"), 2)
rows_het_bra <- rep (c ("mayoral (brazil)", "", "", "", "[0.75ex] subnational (Brazil)", "", "", ""), 2)
rows_dist <- rep (c ("polarized", "close"), 12)
rows_visib_bra <- rep (c ("neither", "neither/both", "first", "second"), 4)

Header1 <- str_c ("\\toprule & & & & & & & & & \\multicolumn{3}{c}{power against}\\\\ \\cmidrule{10-12} \n")
Header2 <- str_c ("\\multicolumn{1}{l}{(a) \\textsc{dv}: \\emph{winner} (0/100)} & \\multicolumn{1}{c}{id. distance} & \\multicolumn{1}{c}{estim.} & \\multicolumn{1}{c}{95\\% \\textsc{ci}} & \\multicolumn{1}{c}{$p$-val.} & \\multicolumn{1}{c}{bwd.} & \\multicolumn{1}{c}{$N^{-} | N^{+}$} & & \\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{$\\frac{\\textsc{sd}_{C}}{2}$} & \\multicolumn{1}{c}{$|\\hat{\\tau}_{\\textsc{rd}}|$} \\\\ \\midrule \n")
Header2b <- str_c ("\\multicolumn{1}{l}{(a) \\textsc{dv}: \\emph{winner} (0/100)} & & \\multicolumn{1}{c}{estim.} & \\multicolumn{1}{c}{95\\% \\textsc{ci}} & \\multicolumn{1}{c}{$p$-val.} & \\multicolumn{1}{c}{bwd.} & \\multicolumn{1}{c}{$N^{-} | N^{+}$} & & \\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{$\\frac{\\textsc{sd}_{C}}{2}$} & \\multicolumn{1}{c}{$|\\hat{\\tau}_{\\textsc{rd}}|$} \\\\ \\midrule \n")
Header2c <- str_c ("\\multicolumn{1}{l}{(a) \\textsc{dv}: \\emph{winner} (0/100)} & \\multicolumn{1}{c}{experienced} & \\multicolumn{1}{c}{estim.} & \\multicolumn{1}{c}{95\\% \\textsc{ci}} & \\multicolumn{1}{c}{$p$-val.} & \\multicolumn{1}{c}{bwd.} & \\multicolumn{1}{c}{$N^{-} | N^{+}$} & & \\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{$\\frac{\\textsc{sd}_{C}}{2}$} & \\multicolumn{1}{c}{$|\\hat{\\tau}_{\\textsc{rd}}|$} \\\\ \\midrule \n")
Header3 <- str_c ("[2.0ex] \\multicolumn{12}{l}{(b) \\textsc{dv}: \\emph{vote share}$_{\\textsc{r}2}$ (0:100)} \\\\ \\midrule \n")


### (4.3.1) Table 3: Ideology: Left-Right (Wikipedia) ####
(tab_hetWiki <- cbind (rows_het, rows_dist, rd_results %>% filter (
  dist_measure=="wiki", subset %in% c ("pola", "close"), covs=="none", bwdselect=="mserd", poly==1) %>%
    select (est:n, empty, sd_c:pow_beta)))

Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{16.0cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\cite{calonico_etal2014}.
  The running variable is \\emph{first round margin}.
  Samples are restricted to elections requiring a runoff.
  Polarized (respectively, close) elections are those in which the absolute ideological distance along the Wikipedia Left-Right dimension between the top-two vote getters in the first round was larger (smaller) than the median for each sample.
  Observations are clustered by election.
  The estimates are calculated by fitting a separate local linear regression at both sides of the threshold, using a triangular kernel.
  Reported number of observations indicate the \\emph{effective} sample size.
  \\end{minipage}}\\\\")
addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 12
addtorow$pos[[4]] <- 24
addtorow$command <- c (Header1, Header2, Header3, Bottom1)
print (xtable ( tab_hetWiki
                , align=c ("l","l","l","c","c","c","c","c","c","c","c","c","c")
                , digits=2
                , caption="{Heterogeneous effects: Left-Right ideological distance (Wikipedia)}"
                , label="T:hetWiki")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow
       , file="tables/tabHetWiki.tex"
       )


### (4.3.3) Table A13: Ideology: Left-Right (V-Party) ####
(tab_hetLR <- cbind (rows_het, rows_dist, rd_results %>% filter (
  dist_measure=="LR", subset %in% c ("pola", "close"), covs=="none", bwdselect=="mserd", poly==1) %>%
    select (est:n, empty, sd_c:pow_beta)))

Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{16.0cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\cite{calonico_etal2014}.
  The running variable is \\emph{first round margin}.
  Samples are restricted to elections requiring a runoff.
  Polarized (respectively, close) elections are those in which the absolute ideological distance along the \\textsc{v}-\\textsc{p}arty Left-Right dimension between the top-two vote getters in the first round was larger (smaller) than the median for each sample.
  Observations are clustered by election.
  The estimates are calculated by fitting a separate local linear regression at both sides of the threshold, using a triangular kernel.
  Reported number of observations indicate the \\emph{effective} sample size.
  \\end{minipage}}\\\\")
addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 12
addtorow$pos[[4]] <- 24
addtorow$command <- c (Header1, Header2, Header3, Bottom1)
print (xtable ( tab_hetLR
                , align=c ("l","l","l","c","c","c","c","c","c","c","c","c","c")
                , digits=2
                , caption="{Heterogeneous effects: Left-Right ideological distance (\\textsc{v}-\\textsc{p}arty)}"
                , label="T:hetLR")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow
       , file="tables/tabHetLR.tex"
       )


### (4.3.4) Table A14: Ideology: (il)liberalism ####
(tab_hetIL <- cbind (rows_het, rows_dist, rd_results %>% filter (
  dist_measure=="IL", subset %in% c ("pola", "close"), covs=="none", bwdselect=="mserd", poly==1) %>%
    select (est:n, empty, sd_c:pow_beta)))

Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{16.0cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\cite{calonico_etal2014}.
  The running variable is \\emph{first round margin}.
  Samples are restricted to elections requiring a runoff.
  Polarized (respectively, close) elections are those in which the absolute ideological distance along the (Il)liberalism dimension between the top-two vote getters in the first round was larger (smaller) than the median for each sample.
  Observations are clustered by election.
  The estimates are calculated by fitting a separate local linear regression at both sides of the threshold, using a triangular kernel.
  Reported number of observations indicate the \\emph{effective} sample size.
  \\end{minipage}}\\\\")
addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 12
addtorow$pos[[4]] <- 24
addtorow$command <- c (Header1, Header2, Header3, Bottom1)
print (xtable ( tab_hetIL
                , align=c ("l","l","l","c","c","c","c","c","c","c","c","c","c")
                , digits=2
                , caption="{Heterogeneous effects: (Il)Liberalism ideological distance}"
                , label="T:hetIL")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow
       , file="tables/tabHetIL.tex"
       )


### (4.3.5) Table A15: Ideology: post-materialism ####
(tab_hetPM <- cbind (rows_het, rows_dist, rd_results %>% filter (
  dist_measure=="PM", subset %in% c ("pola", "close"), covs=="none", bwdselect=="mserd", poly==1) %>%
    select (est:n, empty, sd_c:pow_beta)))

Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{16.0cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\cite{calonico_etal2014}.
  The running variable is \\emph{first round margin}.
  Samples are restricted to elections requiring a runoff.
  Polarized (respectively, close) elections are those in which the absolute ideological distance along the Post-Materialism dimension between the top-two vote getters in the first round was larger (smaller) than the median for each sample.
  Observations are clustered by election.
  The estimates are calculated by fitting a separate local linear regression at both sides of the threshold, using a triangular kernel.
  Reported number of observations indicate the \\emph{effective} sample size.
  \\end{minipage}}\\\\")
addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 12
addtorow$pos[[4]] <- 24
addtorow$command <- c (Header1, Header2, Header3, Bottom1)
print (xtable ( tab_hetPM
                , align=c ("l","l","l","c","c","c","c","c","c","c","c","c","c")
                , digits=2
                , caption="{Heterogeneous effects: Post-Materialism ideological distance}"
                , label="T:hetPM")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow
       , file="tables/tabHetPM.tex"
       )


### (4.3.6) Table A11: Visibility: Non-incumbent sample ####
(tab_hetInc <- cbind (rows_het_inc, rep ("", length (rows_het_inc)), rd_results %>% filter (
  dist_measure=="wiki", subset == "i_none", covs=="none", bwdselect=="mserd", poly==1) %>%
    select (est:n, empty, sd_c:pow_beta)))

Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{15.0cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\cite{calonico_etal2014}.
  The running variable is \\emph{first round margin}.
  Samples are restricted to (i) elections requiring a runoff in which (ii) neither the first- nor the second-placed candidate in the first round was the incumbent at the time of the election.
  Observations are clustered by election.
  The estimates are calculated by fitting a separate local linear regression at both sides of the threshold, using a triangular kernel.
  Reported number of observations indicate the \\emph{effective} sample size.
  \\end{minipage}}\\\\")
addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 6
addtorow$pos[[4]] <- 12
addtorow$command <- c (Header1, Header2b, Header3, Bottom1)
print (xtable ( tab_hetInc
                , align=c ("l","l","l","c","c","c","c","c","c","c","c","c","c")
                , digits=2
                , caption="{Heterogeneous effects: Open seat races}"
                , label="T:hetInc")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top"
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow
       , file="tables/tabHetInc.tex"
       )


### (4.3.7) Table A12: Visibility: Previous experience (Brazil only) ####
(tab_hetExp <- cbind (rows_het_bra, rows_visib_bra, rd_results %>% arrange (outcome, sample2, subset) %>% filter (
  dist_measure=="wiki", subset %in% c ("e_none", "e_both", "e_first", "e_second"), covs=="none", bwdselect=="mserd", poly==1) %>%
    select (est:n, empty, sd_c:pow_beta)))

Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{16.0cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\cite{calonico_etal2014}.
  The running variable is \\emph{first round margin}.
  Samples are restricted to subnational elections in Brazil requiring a runoff.
  The experience variable indicates whether the sample was restricted to elections where, respectively, none of the top-two vote getters in the first round had previous elected experience; neither or both had (i.e., none of the was advantaged and disadvantaged in this regard); only the first-placed had; or only the second-placed had, respectively.
  Observations are clustered by election.
  The estimates are calculated by fitting a separate local linear regression at both sides of the threshold, using a triangular kernel.
  Reported number of observations indicate the \\emph{effective} sample size.
  \\end{minipage}}\\\\")
addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 8
addtorow$pos[[4]] <- 16
addtorow$command <- c (Header1, Header2c, Header3, Bottom1)
print (xtable ( tab_hetExp
                , align=c ("l","l","l","c","c","c","c","c","c","c","c","c","c")
                , digits=2
                , caption="{Heterogeneous effects: Previous experience (Brazil only)}"
                , label="T:hetExp")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top"
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow
       , file="tables/tabHetExp.tex"
       )




###### (5) Balance and robustness ######

#### (5.1) Figure A16: Sensitivity to bandwidth choice ####

## objects used for looping and storage
outcomes <- c ("winner", "share_r2")
samples <- unique (as.character (bcand$sample2))
bwd_results <- NULL ## we need this to "store" the results


## loop to get all the results ####
for (o in 1:length (outcomes)){
  
  ## selecting the outcomes of interest
  bcand$outcome_tmp <- unlist (bcand[,which (colnames (bcand) == outcomes[o])])
  
  
  ## selecting the sample(s) of interest
  for (s in 1:n_distinct (bcand$sample2)){
    
    
    ## creating the vectors of bandwidth values
    
    # CCT optimal
    cct <- with (filter (bcand, sample2==samples[s]), rdbwselect (
      y=outcome_tmp, x=score, bwselect="mserd", covs=NULL, cluster=ele_id, p=1, q=2))$bws
    
    # bwds for estimates
    bwd_h <- c (
      
      # CCT bandwidths
      cct[1]/2, cct[1], cct[1]*2
      
      # I-K bandwidth
      , IKbandwidth (Y=filter (bcand, sample2==samples[s])$outcome_tmp, X=filter (bcand, sample2==samples[s])$score, kernel="triangular")
      
      # other values
      , seq (4, 34, by=3))
    
    # bwds for bias -> estimated for CCT, 1.75 * bwd_h for the rest
    bwd_b <- c (cct[3]/2, cct[3], cct[3]*2, bwd_h[-c (1:3)]*1.75)
    
    
    ## selecting the bandwidth of interest
    for (b in 1:length (bwd_h)){
      
      ## looping over all bandwidth choices
      rd_tmp <- with (
        filter (bcand, sample2==samples[s]
        ), rdrobust (
          y=outcome_tmp, x=score, bwselect="mserd", h=bwd_h[b], b=bwd_b[b]
          , covs=NULL, cluster=ele_id, p=1, q=2))
      
      ## storing the values of interest
      bwd_results <- rbind (
        bwd_results, c (samples[s], outcomes[o], b, extract_rd (rd_tmp)))
      
    }}}
bwd_results <- as.data.frame (bwd_results)
colnames (bwd_results) <- c ("sample2", "outcome", "bwd_type", "est", "ci", "pval", "bwd", "n")
bwd_results <- bwd_results %>% 
  mutate (
    sample2 = factor (sample2, levels=c (
      "full", "presi", "gover", "mayor", "brazil", "others"))
    , sample = case_when (
      sample2 == "full" ~ "full sample"
      , sample2 == "presi" ~ "presidential\nelections"
      , sample2 == "gover" ~ "gubernatorial\nelections"
      , sample2 == "mayor" ~ "mayoral\nelections"
      , sample2 == "brazil" ~ "subnational\n(Brazil)"
      , sample2 == "others" ~ "subnational\n(outside Brazil)")
    , sample2 = factor (sample, levels = c (
      "full sample", "presidential\nelections", "gubernatorial\nelections"
      , "mayoral\nelections", "subnational\n(Brazil)", "subnational\n(outside Brazil)"))
    , outcome = case_when (
      outcome == "winner" ~ "DV: winner (0/100)"
      , outcome == "share_r2" ~ "DV: vote share R2 (0:100)"
      )
    , outcome = factor (outcome, levels = c (
      "DV: winner (0/100)", "DV: vote share R2 (0:100)"))
    , bwd_type = as.numeric (bwd_type)
    , bwd_type = case_when (
      bwd_type %in% 1:3 ~ "CCT-optimal (1/2x, 1x, 2x)"
      , bwd_type == 4 ~ "Imbens-Kalyanaraman"
      , TRUE ~ "manual: 4 to 34")
    , bwd_type = factor (bwd_type, levels = c (
      "CCT-optimal (1/2x, 1x, 2x)", "Imbens-Kalyanaraman", "manual: 4 to 34"))
    , est = as.numeric (est)
    , pval = as.numeric (pval)
    , bwd = as.numeric (bwd)
    , ci = str_replace (ci, "\\[", "")
    , ci = str_replace (ci, "\\]", "")
    , low95 = as.numeric (str_split_fixed (ci, pattern=":", n=2)[,1])
    , high95 = as.numeric (str_split_fixed (ci, pattern=":", n=2)[,2]) )


## drawing the plots
with (bwd_results, by (low95, outcome, summary))
with (bwd_results, by (high95, outcome, summary)) ## to see the ranges of the axes

(bwdp_main <- ggplot (filter (bwd_results, outcome %in% c ("DV: winner (0/100)", "DV: vote share R2 (0:100)")), aes (x=bwd, y=est, color=bwd_type))
  + geom_hline (yintercept=0, col=col_cutoff)
  + geom_point ()
  + geom_errorbar (aes (ymin=low95, ymax=high95))
  + facet_grid (outcome ~ sample, scales="free_y")
  + scale_x_continuous (limits=c (0, 35), breaks=seq (0, 35, by=5))
  + scale_color_manual (name="", values=c (col_movie[5], col_movie[4], col_movie[2]))
  + xlab (expression (bandwidth)) + ylab (expression (effect~size~"(in percentage points)"))
  + theme (strip.background=element_rect (fill=col_movie[3])
           , legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-21,-9,-9,-9)) )


## exporting the plot
pwid <- 17*2.05
phei <- 17*0.75
ggsave ("figures/figBwdPlotMain.png"
        , bwdp_main, width=pwid, height=phei, units="cm")




#### (5.2) Placebo RD plots: Effect on ideology scores ####

## list of outcomes + objects to store the results
outcomes <- c ("wiki_lr", "ideoLR_fa", "ideoIL_fa", "ideoPM_fa"
               , "wiki_lr_na", "ideoLR_fa_na" ## missing values are (almost) identical for all V-Party ideology measures
               , "incParty", "incCandidate") 
samples <- unique (as.character (bcand$sample2))
plot_results_placebo <- NULL ## we need this to "store" the results


### (5.2.1) loop to get all the plots ####
for (o in 1:length (outcomes)){
  
  ### selecting the outcomes of interest
  bcand$outcome_tmp <- unlist (bcand[,which (colnames (bcand) == outcomes[o])])
  
  
  ### selecting the sample(s) of interest
  for (s in 1:n_distinct (bcand$sample2)){
    
    ### drawing the plots
    
    ## QSMV: quantile-spaced, mimicking variability -> better reflects the actual distribution of the data
    
    # full sample(s)
    qsmv_all <- with (filter (bcand, sample2==samples[s]), rdplot (
      y=outcome_tmp, x=score, p=3, binselect="qsmv", scale=1, kernel="uniform", x.lim=c (-50, 50)))
    
    
    ## exporting the data we'll use to build the plots manually later
    qsmv_all_p <- bind_rows (
      ggplot_build (qsmv_all$rdplot)$data[[1]] %>% mutate (type="bin")
      , ggplot_build (qsmv_all$rdplot)$data[[2]] %>% mutate (type="ll")
      , ggplot_build (qsmv_all$rdplot)$data[[3]] %>% mutate (type="rl") ) %>% mutate (
        bin_select="qsmv", poly=3, subset="main", dist_measure=dist_measure[d], sample2=samples[s], outcome=outcomes[o] )
    
    ## exporting the values
    plot_results_placebo <- bind_rows (plot_results_placebo, qsmv_all_p)
    }}



### (5.2.2) updating some variables ####
plot_results_placebo2 <- plot_results_placebo %>%
  mutate (
    type = factor (type)
    , bin_select = factor (bin_select)
    , sample2 = factor (sample2)
    , subset = factor (subset)
    , outcome = factor (outcome, levels=outcomes)
    
    # renaming the samples
    , sample = case_when (
      sample2 == "full" ~ "full sample"
      , sample2 == "presi" ~ "presidential\nelections"
      , sample2 == "gover" ~ "gubernatorial\nelections"
      , sample2 == "mayor" ~ "mayoral\nelections"
      , sample2 == "brazil" ~ "subnational\n(Brazil)"
      , sample2 == "others" ~ "subnational\n(outside Brazil)")
    , sample = factor (sample, levels = c (
      "full sample", "presidential\nelections", "gubernatorial\nelections"
      , "mayoral\nelections", "subnational\n(Brazil)", "subnational\n(outside Brazil)"))
    
    # renaming the outcome variable
    , out_var = dplyr::recode (
      outcome
      , wiki_lr = "Left-Right\n(Wikipedia)"
      , ideoLR_fa = "Left-Right\n(V-Party)"
      , ideoIL_fa = "(Il)Liberalism\n(V-Party)"
      , ideoPM_fa = "Post-Materialism\n(V-Party)"
      , wiki_lr_na = "Missing (0/1)\n(Wikipedia)"
      , ideoLR_fa_na = "Missing (0/1)\n(V-Party)"
      , incParty = "Incumbent\nParty (0/1)"
      , incCandidate = "Incumbent\nCandidate (0/1)"
      )
    , out_var = factor (out_var, levels = c (
      "Left-Right\n(Wikipedia)", "Left-Right\n(V-Party)", "(Il)Liberalism\n(V-Party)", "Post-Materialism\n(V-Party)"
      , "Missing (0/1)\n(Wikipedia)", "Missing (0/1)\n(V-Party)", "Incumbent\nParty (0/1)", "Incumbent\nCandidate (0/1)"
      ))
    
    ) %>% group_by ( ## to identify each plot uniquely
    sample2, dist_measure, subset, outcome, bin_select, poly) %>% 
  mutate (
    y_below = ifelse (type=="ll" & x==0, y, NA)
    , y_below = max (y_below, na.rm=T)
    , y_above = ifelse (type=="rl" & x==0, y, NA)
    , y_above = max (y_above, na.rm=T)
    , rd_estim = y_above - y_below
    , rd_estim_text = str_c ("\u03c4 = ", sprintf ("%.2f", round (rd_estim, 2)), "pp.") ## tau
    
    , PANEL = NULL
    , group = NULL
    , shape = NULL
    , colour = NULL
    , size = NULL
    , fill = NULL
    , alpha = NULL
    , stroke = NULL
    , flipped_aes = NULL
    , linetype = NULL ) ## warning reported. Don't worry
summary (plot_results_placebo2)



### (5.2.3) Drawing the plots ####

## (5.2.3.1) Figure A14: Ideology scores, QSMV
range (filter (plot_results_placebo2, out_var %in% c ("Left-Right\n(Wikipedia)", "Left-Right\n(V-Party)", "(Il)Liberalism\n(V-Party)", "Post-Materialism\n(V-Party)"), subset=="main", bin_select=="qsmv", poly==3, type=="bin")$y) ## -0.64 to 1.17

(rdp51_ideo_qsmv3 <- ggplot (filter (plot_results_placebo2, out_var %in% c ("Left-Right\n(Wikipedia)", "Left-Right\n(V-Party)", "(Il)Liberalism\n(V-Party)", "Post-Materialism\n(V-Party)"), subset=="main", bin_select=="qsmv", poly==3, type=="bin"), aes (x=x, y=y))
  + geom_vline (xintercept=0, col=col_cutoff)
  + geom_point (size=size_bin, alpha=alpha_bin, col=col_bin)
  + geom_line (data=filter (plot_results_placebo2, out_var %in% c ("Left-Right\n(Wikipedia)", "Left-Right\n(V-Party)", "(Il)Liberalism\n(V-Party)", "Post-Materialism\n(V-Party)"), subset=="main", bin_select=="qsmv", poly==3, type=="ll"), aes (x=x, y=y), col=col_line)
  + geom_line (data=filter (plot_results_placebo2, out_var %in% c ("Left-Right\n(Wikipedia)", "Left-Right\n(V-Party)", "(Il)Liberalism\n(V-Party)", "Post-Materialism\n(V-Party)"), subset=="main", bin_select=="qsmv", poly==3, type=="rl"), aes (x=x, y=y), col=col_line)
  + facet_grid (out_var ~ sample)
  + geom_text (data=filter (plot_results_placebo2, out_var %in% c ("Left-Right\n(Wikipedia)", "Left-Right\n(V-Party)", "(Il)Liberalism\n(V-Party)", "Post-Materialism\n(V-Party)"), subset=="main", bin_select=="qsmv", poly==3) %>% group_by (sample, out_var) %>% summarise (rd_estim_text=unique (rd_estim_text))
               , mapping=aes (x=place_text_x_placebo, y=place_text_y_placebo, label=rd_estim_text), size=size_text )
  + scale_x_continuous (limits=c (-40, 40), breaks=seq (-50, 50, by=10))
  + scale_y_continuous (limits=c (-0.65, 1.25), breaks=seq (-2, 2, by=0.25))
  + xlab (expression (first~round~margin~"(%)")) + ylab (expression (ideology))
  + theme (strip.background=element_rect (fill=col_movie[3])
           , axis.title.x = element_text (margin=margin (t=10, r=0, b=0, l=0))) )


## (5.2.3.2) Figure A15: Remaining variables (missing + incumbency), QSMV
range (filter (plot_results_placebo2, out_var %in% c ("Missing (0/1)\n(Wikipedia)", "Missing (0/1)\n(V-Party)", "Incumbent\nParty (0/1)", "Incumbent\nCandidate (0/1)"), subset=="main", bin_select=="qsmv", poly==3, type=="bin")$y) ## 0 to 0.7

(rdp52_rest_qsmv3 <- ggplot (filter (plot_results_placebo2, out_var %in% c ("Missing (0/1)\n(Wikipedia)", "Missing (0/1)\n(V-Party)", "Incumbent\nParty (0/1)", "Incumbent\nCandidate (0/1)"), subset=="main", bin_select=="qsmv", poly==3, type=="bin"), aes (x=x, y=y))
  + geom_vline (xintercept=0, col=col_cutoff)
  + geom_point (size=size_bin, alpha=alpha_bin, col=col_bin)
  + geom_line (data=filter (plot_results_placebo2, out_var %in% c ("Missing (0/1)\n(Wikipedia)", "Missing (0/1)\n(V-Party)", "Incumbent\nParty (0/1)", "Incumbent\nCandidate (0/1)"), subset=="main", bin_select=="qsmv", poly==3, type=="ll"), aes (x=x, y=y), col=col_line)
  + geom_line (data=filter (plot_results_placebo2, out_var %in% c ("Missing (0/1)\n(Wikipedia)", "Missing (0/1)\n(V-Party)", "Incumbent\nParty (0/1)", "Incumbent\nCandidate (0/1)"), subset=="main", bin_select=="qsmv", poly==3, type=="rl"), aes (x=x, y=y), col=col_line)
  + facet_grid (out_var ~ sample)
  + geom_text (data=filter (plot_results_placebo2, out_var %in% c ("Missing (0/1)\n(Wikipedia)", "Missing (0/1)\n(V-Party)", "Incumbent\nParty (0/1)", "Incumbent\nCandidate (0/1)"), subset=="main", bin_select=="qsmv", poly==3) %>% group_by (sample, out_var) %>% summarise (rd_estim_text=unique (rd_estim_text))
               , mapping=aes (x=place_text_x_placebo, y=place_text_y_placebo2, label=rd_estim_text), size=size_text )
  + scale_x_continuous (limits=c (-40, 40), breaks=seq (-50, 50, by=10))
  + scale_y_continuous (limits=c (0, 0.7), breaks=seq (0, 0.7, by=0.1))
  + xlab (expression (first~round~margin~"(%)")) + ylab ("")
  + theme (strip.background=element_rect (fill=col_movie[3])
           , axis.title.x = element_text (margin=margin (t=10, r=0, b=0, l=0))) )



### (5.2.4) exporting
pwid <- 17*2.5
phei <- 17*1.45
ggsave ("figures/figRDPlaceboIdeo.png"
        , rdp51_ideo_qsmv3, width=pwid, height=phei, units="cm")
ggsave ("figures/figRDPlaceboOth.png"
        , rdp52_rest_qsmv3, width=pwid, height=phei, units="cm")




#### (5.3) Placebo: RD models ####

### (5.3.1) Getting the values ####

## list of outcomes + objects to store the results
outcomes <- c ("wiki_lr", "ideoLR_fa", "ideoIL_fa", "ideoPM_fa"
               , "wiki_lr_na", "ideoLR_fa_na" ## missing values are (almost) identical for all V-Party ideology measures
               , "incParty", "incCandidate") 
samples <- unique (as.character (bcand$sample2))
rd_results_placebo <- NULL ## we need this to "store" the results


## loop to get all the results ####
for (o in 1:length (outcomes)){
  
  ## selecting the outcomes of interest
  bcand$outcome_tmp <- unlist (bcand[,which (colnames (bcand) == outcomes[o])])
  
  ## selecting the sample(s) of interest
  for (s in 1:n_distinct (bcand$sample2)){
    
    ## selecting the bandwidth selection procedure
    for (b in c ("mserd", "cerrd")) {
      
      ## selecting the polynomial degree
      for (p in 1:2) {
        
        
        ## estimating the model(s)
        
        # RD estimate
        rd_main <- with (
          filter (bcand, sample2==samples[s]
          ), rdrobust (
            y=outcome_tmp, x=score, bwselect=b, covs=NULL, cluster=ele_id, p=p, q=p+1))
        
        # SD of outcome in the control group within (conventional) RD bandwidth
        sd_main_c <- sd (filter (bcand, sample2==samples[s] & score<0 & abs(score)<=rd_main$bws[1,1])$outcome_tmp, na.rm=TRUE)
        
        # power
        pow_main01 <- rdpower (data=as.data.frame (
          filter (bcand, sample2==samples[s]) %>% select (outcome_tmp, score))
          , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
          , tau=sd_main_c
          , cluster=filter (bcand, sample2==samples[s])$ele_id)
        pow_main02 <- rdpower (data=as.data.frame (
          filter (bcand, sample2==samples[s]) %>% select (outcome_tmp, score))
          , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
          , tau=sd_main_c/2
          , cluster=filter (bcand, sample2==samples[s])$ele_id)
        pow_main03 <- rdpower (data=as.data.frame (
          filter (bcand, sample2==samples[s]) %>% select (outcome_tmp, score))
          , cutoff=0, alpha=0.05, level=95, p=1, q=p+1, covs=NULL, plot=FALSE
          , tau=rd_main$Estimate[1]
          , cluster=filter (bcand, sample2==samples[s])$ele_id)
        
        
        ## storing the values of interest
        rd_results_placebo <- rbind (
          rd_results_placebo
          , c (samples[s], "main", "none", outcomes[o], b, p, extract_rd (rd_main), sprintf ("%.2f", round (sd_main_c, 2)), sprintf ("%.2f", round (pow_main01$power.rbc, 2)), sprintf ("%.2f", round (pow_main02$power.rbc, 2)), sprintf ("%.2f", round (pow_main03$power.rbc, 2)))
          ) }}}}
rd_results_placebo <- as.data.frame (rd_results_placebo)
colnames (rd_results_placebo) <- c ("sample2", "subset", "covs", "outcome", "bwdselect", "poly", "est", "ci", "pval", "bwd", "n", "sd_c", "pow_sd", "pow_sd2", "pow_beta")
rd_results_placebo <- rd_results_placebo %>% 
  mutate (
    outcome = factor (outcome, levels=outcomes)
    , sample2 = factor (sample2, levels=c (
      "full", "presi", "gover", "mayor", "brazil", "others"))
    , subset = factor (subset)
    , bwdselect = factor (bwdselect, levels=c (
      "mserd", "cerrd"))
    , poly = as.numeric (poly)
    , empty = "" ) %>% 
  arrange (outcome, sample2)
summary (rd_results_placebo)
head (rd_results_placebo, 25)



### (5.3.2) Headers and footers ####
rows_placebo <- rep (c ("full sample", "presidential", "gubernatorial", "mayoral", "subnational (Brazil)", "subnational ($\\neg$ Brazil)"), 4)

## headers and footers
summary (select (bcand, outcomes)) ## to get the range of the variables

Header1 <- str_c ("\\toprule & & & & & & & & \\multicolumn{3}{c}{power against}\\\\ \\cmidrule{9-11} \n")
Header2 <- str_c ("\\multicolumn{1}{l}{(a) \\textsc{dv}: \\emph{Left-Right (Wikipedia)} (-2.3:1.9)} & \\multicolumn{1}{c}{estim.} & \\multicolumn{1}{c}{95\\% \\textsc{ci}} & \\multicolumn{1}{c}{$p$-val.} & \\multicolumn{1}{c}{bwd.} & \\multicolumn{1}{c}{$N^{-} | N^{+}$} & & 
\\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{$\\frac{\\textsc{sd}_{C}}{2}$} & \\multicolumn{1}{c}{$|\\hat{\\tau}_{\\textsc{rd}}|$} \\\\ \\midrule \n")
Header3 <- str_c ("[2.0ex] \\multicolumn{11}{l}{(b) \\textsc{dv}: \\emph{Left-Right (\\textsc{v}-\\textsc{p}arty)} (-3.4:3.5)} \\\\ \\midrule \n")
Header4 <- str_c ("[2.0ex] \\multicolumn{11}{l}{(c) \\textsc{dv}: \\emph{(Il)Liberalism (\\textsc{v}-\\textsc{p}arty)} (-2.5:1.3)} \\\\ \\midrule \n")
Header5 <- str_c ("[2.0ex] \\multicolumn{11}{l}{(d) \\textsc{dv}: \\emph{Post-Materialism (\\textsc{v}-\\textsc{p}arty)} (-2.2:2.6)} \\\\ \\midrule \n")
Header6 <- str_c ("\\multicolumn{1}{l}{(a) \\textsc{dv}: \\emph{Missing Wikipedia scores} (0/1)} & \\multicolumn{1}{c}{estim.} & \\multicolumn{1}{c}{95\\% \\textsc{ci}} & \\multicolumn{1}{c}{$p$-val.} & \\multicolumn{1}{c}{bwd.} & \\multicolumn{1}{c}{$N^{-} | N^{+}$} & & 
\\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{$\\frac{\\textsc{sd}_{C}}{2}$} & \\multicolumn{1}{c}{$|\\hat{\\tau}_{\\textsc{rd}}|$} \\\\ \\midrule \n")
Header7 <- str_c ("[2.0ex] \\multicolumn{11}{l}{(b) \\textsc{dv}: \\emph{Missing \\textsc{v}-\\textsc{p}arty scores} (0/1)} \\\\ \\midrule \n")
Header8 <- str_c ("[2.0ex] \\multicolumn{11}{l}{(c) \\textsc{dv}: \\emph{Incumbent Party} (0/1)} \\\\ \\midrule \n")
Header9 <- str_c ("[2.0ex] \\multicolumn{11}{l}{(d) \\textsc{dv}: \\emph{Incumbent Candidate} (0/1)} \\\\ \\midrule \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{11}{l}{
  \\begin{minipage}{15.25cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\cite{calonico_etal2014}.
  The running variable is \\emph{first round margin}.
  Samples are restricted to elections requiring a runoff.
  Observations are clustered by election.
  The estimates are calculated by fitting a separate local linear regression at both sides of the threshold, using a triangular kernel.
  Reported number of observations indicate the \\emph{effective} sample size.
  \\end{minipage}}\\\\")


### (5.3.3) Table A4: Placebo (I): Ideology scores ####
(tab_placebo_ideo <- cbind (
  rows_placebo, rd_results_placebo %>% 
    filter (subset=="main", covs=="none", bwdselect=="mserd", poly==1
            , outcome %in% c ("wiki_lr", "ideoLR_fa", "ideoIL_fa", "ideoPM_fa")) %>%
    select (est:n, empty, sd_c:pow_beta)))
addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 6
addtorow$pos[[4]] <- 12
addtorow$pos[[5]] <- 18
addtorow$pos[[6]] <- 24
addtorow$command <- c (Header1, Header2, Header3, Header4, Header5, Bottom1)
print (xtable ( tab_placebo_ideo
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c")
                , digits=2
                , caption="{\\textsc{rd} estimates: Placebo outcomes (ideology scores)}"
                , label="T:placeboIdeo")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow
       , file="tables/tabPlaceboIdeo.tex"
       )



### (5.3.4) Table A5: Placebo (II): Other outcomes (dummies) ####
(tab_placebo_dum <- cbind (
  rows_placebo, rd_results_placebo %>% 
    filter (subset=="main", covs=="none", bwdselect=="mserd", poly==1
            , outcome %in% c ("wiki_lr_na", "ideoLR_fa_na", "incParty", "incCandidate")) %>%
    select (est:n, empty, sd_c:pow_beta)))
addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 6
addtorow$pos[[4]] <- 12
addtorow$pos[[5]] <- 18
addtorow$pos[[6]] <- 24
addtorow$command <- c (Header1, Header6, Header7, Header8, Header9, Bottom1)
print (xtable ( tab_placebo_dum
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c")
                , digits=2
                , caption="{\\textsc{rd} estimates: Placebo outcomes (missingness in ideology scores and incumbency status)}"
                , label="T:placeboOth")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow
       , file="tables/tabPlaceboOth.tex"
       )




#### (5.4) Table A6: Randomized inference ####

### (5.4.1) Estimating the models

## winner
rdloc_win_full <- with (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="full"), rdrandinf (
  Y=winner, R=score, wmin=0.25, wstep=0.1
  , covariates = as.matrix (select (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="full"), wiki_lr, incParty, incCandidate))
  , statistic="all", reps=reps_dlocrand, seed=20071202))
rdloc_win_presi <- with (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="presi"), rdrandinf (
  Y=winner, R=score, wmin=0.25, wstep=0.1
  , covariates = as.matrix (select (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="presi"), wiki_lr, incParty, incCandidate))
  , statistic="all", reps=reps_dlocrand, seed=20071202))
rdloc_win_gover <- with (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="gover"), rdrandinf (
  Y=winner, R=score, wmin=0.25, wstep=0.1
  , covariates = as.matrix (select (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="gover"), wiki_lr, incParty, incCandidate))
  , statistic="all", reps=reps_dlocrand, seed=20071202))
rdloc_win_mayor <- with (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="mayor"), rdrandinf (
  Y=winner, R=score, wmin=0.25, wstep=0.1
  , covariates = as.matrix (select (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="mayor"), wiki_lr, incParty, incCandidate))
  , statistic="all", reps=reps_dlocrand, seed=20071202))
rdloc_win_brazil <- with (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="brazil"), rdrandinf (
  Y=winner, R=score, wmin=0.25, wstep=0.1
  , covariates = as.matrix (select (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="brazil"), wiki_lr, incParty, incCandidate))
  , statistic="all", reps=reps_dlocrand, seed=20071202))
rdloc_win_others <- with (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="others"), rdrandinf (
  Y=winner, R=score, wmin=0.25, wstep=0.1
  , covariates = as.matrix (select (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="others"), wiki_lr, incParty, incCandidate))
  , statistic="all", reps=reps_dlocrand, seed=20071202))


## vote share R2
rdloc_sh2_full <- with (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="full"), rdrandinf (
  Y=share_r2, R=score, wmin=0.25, wstep=0.1
  , covariates = as.matrix (select (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="full"), wiki_lr, incParty, incCandidate))
  , statistic="all", reps=reps_dlocrand, seed=20071202))
rdloc_sh2_presi <- with (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="presi"), rdrandinf (
  Y=share_r2, R=score, wmin=0.25, wstep=0.1
  , covariates = as.matrix (select (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="presi"), wiki_lr, incParty, incCandidate))
  , statistic="all", reps=reps_dlocrand, seed=20071202))
rdloc_sh2_gover <- with (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="gover"), rdrandinf (
  Y=share_r2, R=score, wmin=0.25, wstep=0.1
  , covariates = as.matrix (select (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="gover"), wiki_lr, incParty, incCandidate))
  , statistic="all", reps=reps_dlocrand, seed=20071202))
rdloc_sh2_mayor <- with (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="mayor"), rdrandinf (
  Y=share_r2, R=score, wmin=0.25, wstep=0.1
  , covariates = as.matrix (select (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="mayor"), wiki_lr, incParty, incCandidate))
  , statistic="all", reps=reps_dlocrand, seed=20071202))
rdloc_sh2_brazil <- with (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="brazil"), rdrandinf (
  Y=share_r2, R=score, wmin=0.25, wstep=0.1
  , covariates = as.matrix (select (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="brazil"), wiki_lr, incParty, incCandidate))
  , statistic="all", reps=reps_dlocrand, seed=20071202))
rdloc_sh2_others <- with (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="others"), rdrandinf (
  Y=share_r2, R=score, wmin=0.25, wstep=0.1
  , covariates = as.matrix (select (filter (bcand, !is.na (wiki_lr_dist12_ntile2) & sample2=="others"), wiki_lr, incParty, incCandidate))
  , statistic="all", reps=reps_dlocrand, seed=20071202))


### (5.4.2) Exporting the table

# getting the values
rows_dlocrand <- rep (c ("full sample", "presidential", "gubernatorial", "mayoral", "subnational (Brazil)", "subnational ($\\neg$ Brazil)"), 2)

tab_dlocrand <- rbind (
  c (sprintf ("%.2f", c (c (rdloc_win_full$obs.stat, rdloc_win_full$p.value)[c (1, 4, 2, 5, 3, 6)], rdloc_win_full$window[2]) %>% round (2)), str_c (sprintf ("%.0f", rdloc_win_full$sumstats[2,1]), sprintf ("%.0f", rdloc_win_full$sumstats[2,1]), sep="$|$"))
  , c (sprintf ("%.2f", c (c (rdloc_win_presi$obs.stat, rdloc_win_presi$p.value)[c (1, 4, 2, 5, 3, 6)], rdloc_win_presi$window[2]) %>% round (2)), str_c (sprintf ("%.0f", rdloc_win_presi$sumstats[2,1]), sprintf ("%.0f", rdloc_win_presi$sumstats[2,1]), sep="$|$"))
  , c (sprintf ("%.2f", c (c (rdloc_win_gover$obs.stat, rdloc_win_gover$p.value)[c (1, 4, 2, 5, 3, 6)], rdloc_win_gover$window[2]) %>% round (2)), str_c (sprintf ("%.0f", rdloc_win_gover$sumstats[2,1]), sprintf ("%.0f", rdloc_win_gover$sumstats[2,1]), sep="$|$"))
  , c (sprintf ("%.2f", c (c (rdloc_win_mayor$obs.stat, rdloc_win_mayor$p.value)[c (1, 4, 2, 5, 3, 6)], rdloc_win_mayor$window[2]) %>% round (2)), str_c (sprintf ("%.0f", rdloc_win_mayor$sumstats[2,1]), sprintf ("%.0f", rdloc_win_mayor$sumstats[2,1]), sep="$|$"))
  , c (sprintf ("%.2f", c (c (rdloc_win_brazil$obs.stat, rdloc_win_brazil$p.value)[c (1, 4, 2, 5, 3, 6)], rdloc_win_brazil$window[2]) %>% round (2)), str_c (sprintf ("%.0f", rdloc_win_brazil$sumstats[2,1]), sprintf ("%.0f", rdloc_win_brazil$sumstats[2,1]), sep="$|$"))
  , c (sprintf ("%.2f", c (c (rdloc_win_others$obs.stat, rdloc_win_others$p.value)[c (1, 4, 2, 5, 3, 6)], rdloc_win_others$window[2]) %>% round (2)), str_c (sprintf ("%.0f", rdloc_win_others$sumstats[2,1]), sprintf ("%.0f", rdloc_win_others$sumstats[2,1]), sep="$|$"))
  
  , c (sprintf ("%.2f", c (c (rdloc_sh2_full$obs.stat, rdloc_sh2_full$p.value)[c (1, 4, 2, 5, 3, 6)], rdloc_sh2_full$window[2]) %>% round (2)), str_c (sprintf ("%.0f", rdloc_sh2_full$sumstats[2,1]), sprintf ("%.0f", rdloc_sh2_full$sumstats[2,1]), sep="$|$"))
  , c (sprintf ("%.2f", c (c (rdloc_sh2_presi$obs.stat, rdloc_sh2_presi$p.value)[c (1, 4, 2, 5, 3, 6)], rdloc_sh2_presi$window[2]) %>% round (2)), str_c (sprintf ("%.0f", rdloc_sh2_presi$sumstats[2,1]), sprintf ("%.0f", rdloc_sh2_presi$sumstats[2,1]), sep="$|$"))
  , c (sprintf ("%.2f", c (c (rdloc_sh2_gover$obs.stat, rdloc_sh2_gover$p.value)[c (1, 4, 2, 5, 3, 6)], rdloc_sh2_gover$window[2]) %>% round (2)), str_c (sprintf ("%.0f", rdloc_sh2_gover$sumstats[2,1]), sprintf ("%.0f", rdloc_sh2_gover$sumstats[2,1]), sep="$|$"))
  , c (sprintf ("%.2f", c (c (rdloc_sh2_mayor$obs.stat, rdloc_sh2_mayor$p.value)[c (1, 4, 2, 5, 3, 6)], rdloc_sh2_mayor$window[2]) %>% round (2)), str_c (sprintf ("%.0f", rdloc_sh2_mayor$sumstats[2,1]), sprintf ("%.0f", rdloc_sh2_mayor$sumstats[2,1]), sep="$|$"))
  , c (sprintf ("%.2f", c (c (rdloc_sh2_brazil$obs.stat, rdloc_sh2_brazil$p.value)[c (1, 4, 2, 5, 3, 6)], rdloc_sh2_brazil$window[2]) %>% round (2)), str_c (sprintf ("%.0f", rdloc_sh2_brazil$sumstats[2,1]), sprintf ("%.0f", rdloc_sh2_brazil$sumstats[2,1]), sep="$|$"))
  , c (sprintf ("%.2f", c (c (rdloc_sh2_others$obs.stat, rdloc_sh2_others$p.value)[c (1, 4, 2, 5, 3, 6)], rdloc_sh2_others$window[2]) %>% round (2)), str_c (sprintf ("%.0f", rdloc_sh2_others$sumstats[2,1]), sprintf ("%.0f", rdloc_sh2_others$sumstats[2,1]), sep="$|$")))
(tab_dlocrand <- cbind (
  rows_dlocrand
  , tab_dlocrand[,1:2]
  , rep ("", nrow (tab_dlocrand))
  , tab_dlocrand[,3:4]
  , rep ("", nrow (tab_dlocrand))
  , tab_dlocrand[,5:8]
  ))
tab_dlocrand[c (2, 6, 8, 12),-1] <- ""
tab_dlocrand

# headers and footers
Header1 <- str_c ("\\toprule & \\multicolumn{2}{c}{diff. in means} & & \\multicolumn{2}{c}{\\textsc{k}-\\textsc{s}$^{*}$} & & \\multicolumn{2}{c}{rank sum} & & \\\\ \\cmidrule{2-3} \\cmidrule{5-6} \\cmidrule{8-9} \n")
Header2 <- str_c ("\\multicolumn{1}{l}{(a) \\textsc{dv}: \\emph{winner} (0/100)} & \\multicolumn{1}{c}{estim.} & \\multicolumn{1}{c}{$p$-val.} & & \\multicolumn{1}{c}{estim.} & \\multicolumn{1}{c}{$p$-val.} & & \\multicolumn{1}{c}{estim.} & \\multicolumn{1}{c}{$p$-val.} & \\multicolumn{1}{c}{bwd.} & \\multicolumn{1}{c}{$N^{-} | N^{+}$} \\\\ \\midrule \n")
Header3 <- str_c ("[2.0ex] \\multicolumn{11}{l}{(b) \\textsc{dv}: \\emph{vote share}$_{\\textsc{r}2}$ (0:100)} \\\\ \\midrule \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{11}{l}{
  \\begin{minipage}{12.5cm}~\\\\
  \\footnotesize Sharp local randomization \\textsc{rd} estimates, calculated following the procedure proposed by \\citeA{cattaneo_etal2016b}.
  Only samples with at least 10 observations at each side of the threshold are included.
  Exact $p$-values based on 10,000 permutations.
  The running variable is \\emph{first round margin}.
  The covariates used to determine balance are Left-Right ideology as measured with Wikipedia tags (\\citealtA{herrmann_doring2022}) as well as partisan and individual incumbency status.
  Samples are restricted to elections requiring a runoff.
  Reported number of observations indicate the \\emph{effective} sample size.
  ($^{*}$) Kolmogorov-Smirnov statistic.
  \\end{minipage}}\\\\")
addtorow <- list()
addtorow$pos <- list()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 6
addtorow$pos[[4]] <- 12
addtorow$command <- c (Header1, Header2, Header3, Bottom1)
print (xtable ( tab_dlocrand
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c")
                , digits=2
                , caption="{\\textsc{rd} estimates: Local randomization approach}"
                , label="T:localRand")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow
       , file="tables/tabRobLocRand.tex"
       )



#### (5.5) Figure A17: Addressing the possibility of mirroring ####
## WARNING: the loop takes a lot of time to run!

### (5.5.1) Sampling ####

## getting unique ID's & specifying parameters
index <- unique (filter (bcand, sample2=="full")$ele_id)
length (index)
set.seed (19831025) ## for reproducibility
sims <- 500 ## number of simulations
vals <- runif (sims*length (index)*2)


## we sample values from a uniform distribution. If higher value for first-placed, pick first-placed; otherwise, pick second-placed
index2 <- as.data.frame (rbind (
  matrix (
    as.numeric (vals[seq (1, length (vals)/2, by=1)] > vals[seq (length (vals)/2+1, length (vals), by=1)]), ncol=sims)
  , matrix (
    as.numeric (vals[seq (1, length (vals)/2, by=1)] < vals[seq (length (vals)/2+1, length (vals), by=1)]), ncol=sims)
))
index2 <- as.data.frame (bind_cols (
  rep (as.character (index), 2)
  , c (rep (1, length (index)), rep (2, length (index)))
  , index2
))
colnames (index2) <- c ("ele_id", "rank_r1", str_c ("sim", 1:sims, sep="_"))
summary (index2)


## joining and pivoting wider -> we'll get a separate sample-simulation combination which we'll then use for the loop
bcand_sim <- left_join (
  bcand, index2, by=c ("ele_id" = "ele_id", "rank_r1" = "rank_r1")) %>% 
  pivot_longer (
    cols = colnames (index2)[-c(1:2)]
    , names_to = "sim_n"
    , values_to = "select") %>% 
  mutate (
    sim_n = factor (sim_n))
nrow (bcand)*sims == nrow (bcand_sim)  ## these must be the same
summary (bcand_sim)



### (5.5.2) Getting the estimates ####

## list of outcomes + objects to store the results
outcomes <- c ("winner", "share_r2")
samples <- unique (as.character (bcand_sim$sample2))
sim_n <- unique (as.character (bcand_sim$sim_n))
rd_results_mirror <- NULL ## we need this to "store" the results


## loop to get all the results ####
for (o in 1:length (outcomes)){
  
  ## selecting the outcomes of interest
  bcand_sim$outcome_tmp <- unlist (bcand_sim[,which (colnames (bcand_sim) == outcomes[o])])
  
  ## selecting the sample(s) of interest
  for (s in 1:n_distinct (bcand_sim$sample2)){
    
    ## selecting the simulation
    for (r in 1:sims) {
      b <- "mserd"
      p <- 1
      
      
      ## estimating the model(s)
      rd_main <- with (
        filter (bcand_sim, sample2==samples[s], sim_n==sim_n[r], select==1
        ), rdrobust (
          y=outcome_tmp, x=score, bwselect=b, covs=NULL, cluster=ele_id, p=p, q=p+1))
      
      
      ## storing everything
      rd_results_mirror <- rbind (
        rd_results_mirror
        , c (samples[s], r,  "main", "none", outcomes[o], b, p, extract_rd (rd_main))
        ) }}}



### (5.5.3) Formatting the data for the plots ####
rd_results_mirror <- as.data.frame (rd_results_mirror)
colnames (rd_results_mirror) <- c ("sample2", "sim_n", "subset", "covs", "outcome", "bwdselect", "poly", "est", "ci", "pval", "bwd", "n")
rd_results_mirror2 <- rd_results_mirror %>% 
  mutate (
    outcome = factor (outcome, levels=outcomes)
    , sample2 = factor (sample2, levels=c (
      "full", "presi", "gover", "mayor", "brazil", "others"))
    , subset = factor (subset)
    , sim_n = as.numeric (sim_n)
    , poly = as.numeric (poly)
    , est = as.numeric (est)
    , pval = as.numeric (pval)
    , low95_sim = NA
    , high95_sim = NA
    , significant05 = ifelse (
      pval <= 0.05, "significant", "not significant")
    , significant05 = factor (significant05, levels=c (
      "significant", "not significant")) ) %>% 
  arrange (outcome, sample2, subset, covs, bwdselect, poly, desc (est), pval) %>% 
  group_by (sample2, outcome, subset, covs, bwdselect, poly) %>% 
  mutate (
    est_rank = rank (-est, ties.method="first"))


## adding estimates reported in Table 2, which we'll need for the comparison
rd_results_mirror2 <- rd_results_mirror2 %>% left_join (
  rd_results %>% filter (dist_measure=="LR" & subset=="main") %>% select (sample2, subset, covs, outcome, bwdselect, poly, est, ci)
  , by=c ("sample2"="sample2", "subset"="subset", "covs"="covs", "outcome"="outcome", "bwdselect"="bwdselect", "poly"="poly")
  , suffix=c ("_sim", "") ) %>% 
  mutate (
    sample = case_when (
      sample2 == "full" ~ "full sample"
      , sample2 == "presi" ~ "presidential elections"
      , sample2 == "gover" ~ "gubernatorial elections"
      , sample2 == "mayor" ~ "mayoral elections"
      , sample2 == "brazil" ~ "subnational (Brazil)"
      , sample2 == "others" ~ "subnational (outside Brazil)")
    , sample = factor (sample, levels = c (
      "full sample", "presidential elections", "gubernatorial elections"
      , "mayoral elections", "subnational (Brazil)", "subnational (outside Brazil)"))
    , est = as.numeric (est)
    , ci_sim = str_replace (ci_sim, "\\[", "")
    , ci_sim = str_replace (ci_sim, "\\]", "")
    , ci = str_replace (ci, "\\[", "")
    , ci = str_replace (ci, "\\]", "")
    , low95_sim = as.numeric (str_split_fixed (as.character (ci_sim), pattern=":", n=2)[,1])
    , high95_sim = as.numeric (str_split_fixed (as.character (ci_sim), pattern=":", n=2)[,2])
    , low95 = as.numeric (str_split_fixed (as.character (ci), pattern=":", n=2)[,1])
    , high95 = as.numeric (str_split_fixed (as.character (ci), pattern=":", n=2)[,2]))
summary (rd_results_mirror2)



### (5.5.4) Drawing the plots ####

## winner as DV
(mirrorp_winner <- ggplot (filter (rd_results_mirror2, outcome=="winner")
                           , aes (x=est_rank, y=est_sim))
 + geom_hline (yintercept=0, col=gray_lines)
 + geom_hline (aes (yintercept=est), color=col_movie[5])
 + geom_hline (aes (yintercept=low95), color=col_movie[5], linetype=2)
 + geom_hline (aes (yintercept=high95), color=col_movie[5], linetype=2)
 + geom_point (col=col_movie[1], size=0.1, alpha=alpha_dots)
 + geom_errorbar (aes (ymin=low95_sim, ymax=high95_sim), col=col_movie[1], width=0.025, size=0.1, alpha=alpha_dots)
 + facet_wrap ( ~ sample, ncol=3)
 + scale_x_continuous (limits=c (0, sims), breaks=seq (0, sims, by=50))
 + scale_y_continuous (breaks=seq (-80, 120, by=20))
 + coord_cartesian (ylim = c (-80, 125))
 + xlab (expression (estimate~rank)) + ylab (expression (RD~estimate~"(in percentage points)"))
 + theme (strip.background=element_rect (fill=col_movie[3])
          , legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-21,-9,-9,-9)) )


## share_r2 as DV
(mirrorp_share <- ggplot (filter (rd_results_mirror2, outcome=="share_r2")
                           , aes (x=est_rank, y=est_sim))
  + geom_hline (yintercept=0, col=gray_lines)
  + geom_hline (aes (yintercept=est), color=col_movie[5])
  + geom_hline (aes (yintercept=low95), color=col_movie[5], linetype=2)
  + geom_hline (aes (yintercept=high95), color=col_movie[5], linetype=2)
  + geom_point (col=col_movie[1], size=0.1, alpha=alpha_dots)
  + geom_errorbar (aes (ymin=low95_sim, ymax=high95_sim), col=col_movie[1], width=0.025, size=0.1, alpha=alpha_dots)
  + facet_wrap ( ~ sample, ncol=3)
  + scale_x_continuous (limits=c (0, sims), breaks=seq (0, sims, by=50))
  + scale_y_continuous (breaks=seq (-20, 30, by=5))
  + coord_cartesian (ylim = c (-15, 30))
  + xlab (expression (estimate~rank)) + ylab (expression (RD~estimate~"(in percentage points)"))
  + theme (strip.background=element_rect (fill=col_movie[3])
           , legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-21,-9,-9,-9)) )



### (5.5.5) Exporting ####
pwid <- 17*1.8
phei <- 17*0.95
ggsave ("figures/figMirrorWinner.png"
        , mirrorp_winner, width=pwid, height=phei, units="cm")
ggsave ("figures/figMirrorShare.png"
        , mirrorp_share, width=pwid, height=phei, units="cm")




#### (5.6) Exporting everything --> if you want to access the results later, uncomment and run this code ####

# setwd (home)
# save.image ("Results Second Rounds 2023-01-19.RData")
# load ("Results Second Rounds 2023-01-19.RData")

sink (NULL)
